]> Git Repo - cpuminer-multi.git/blame - sha3/sph_types.h
windows: get the proper cpu name like linux
[cpuminer-multi.git] / sha3 / sph_types.h
CommitLineData
b089cc9f
LJ
1/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */
2/**
3 * Basic type definitions.
4 *
5 * This header file defines the generic integer types that will be used
6 * for the implementation of hash functions; it also contains helper
7 * functions which encode and decode multi-byte integer values, using
8 * either little-endian or big-endian conventions.
9 *
10 * This file contains a compile-time test on the size of a byte
11 * (the <code>unsigned char</code> C type). If bytes are not octets,
12 * i.e. if they do not have a size of exactly 8 bits, then compilation
13 * is aborted. Architectures where bytes are not octets are relatively
14 * rare, even in the embedded devices market. We forbid non-octet bytes
15 * because there is no clear convention on how octet streams are encoded
16 * on such systems.
17 *
18 * ==========================(LICENSE BEGIN)============================
19 *
20 * Copyright (c) 2007-2010 Projet RNRT SAPHIR
21 *
22 * Permission is hereby granted, free of charge, to any person obtaining
23 * a copy of this software and associated documentation files (the
24 * "Software"), to deal in the Software without restriction, including
25 * without limitation the rights to use, copy, modify, merge, publish,
26 * distribute, sublicense, and/or sell copies of the Software, and to
27 * permit persons to whom the Software is furnished to do so, subject to
28 * the following conditions:
29 *
30 * The above copyright notice and this permission notice shall be
31 * included in all copies or substantial portions of the Software.
32 *
33 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
34 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
35 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
36 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
37 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
38 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
39 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 *
41 * ===========================(LICENSE END)=============================
42 *
43 * @file sph_types.h
44 * @author Thomas Pornin <[email protected]>
45 */
46
47#ifndef SPH_TYPES_H__
48#define SPH_TYPES_H__
49
50#include <limits.h>
51
52/*
53 * All our I/O functions are defined over octet streams. We do not know
54 * how to handle input data if bytes are not octets.
55 */
56#if CHAR_BIT != 8
57#error This code requires 8-bit bytes
58#endif
59
60/* ============= BEGIN documentation block for Doxygen ============ */
61
62#ifdef DOXYGEN_IGNORE
63
64/** @mainpage sphlib C code documentation
65 *
66 * @section overview Overview
67 *
68 * <code>sphlib</code> is a library which contains implementations of
69 * various cryptographic hash functions. These pages have been generated
70 * with <a href="http://www.doxygen.org/index.html">doxygen</a> and
71 * document the API for the C implementations.
72 *
73 * The API is described in appropriate header files, which are available
74 * in the "Files" section. Each hash function family has its own header,
75 * whose name begins with <code>"sph_"</code> and contains the family
76 * name. For instance, the API for the RIPEMD hash functions is available
77 * in the header file <code>sph_ripemd.h</code>.
78 *
79 * @section principles API structure and conventions
80 *
81 * @subsection io Input/output conventions
82 *
83 * In all generality, hash functions operate over strings of bits.
84 * Individual bits are rarely encountered in C programming or actual
85 * communication protocols; most protocols converge on the ubiquitous
86 * "octet" which is a group of eight bits. Data is thus expressed as a
87 * stream of octets. The C programming language contains the notion of a
88 * "byte", which is a data unit managed under the type <code>"unsigned
89 * char"</code>. The C standard prescribes that a byte should hold at
90 * least eight bits, but possibly more. Most modern architectures, even
91 * in the embedded world, feature eight-bit bytes, i.e. map bytes to
92 * octets.
93 *
94 * Nevertheless, for some of the implemented hash functions, an extra
95 * API has been added, which allows the input of arbitrary sequences of
96 * bits: when the computation is about to be closed, 1 to 7 extra bits
97 * can be added. The functions for which this API is implemented include
98 * the SHA-2 functions and all SHA-3 candidates.
99 *
100 * <code>sphlib</code> defines hash function which may hash octet streams,
101 * i.e. streams of bits where the number of bits is a multiple of eight.
102 * The data input functions in the <code>sphlib</code> API expect data
103 * as anonymous pointers (<code>"const void *"</code>) with a length
104 * (of type <code>"size_t"</code>) which gives the input data chunk length
105 * in bytes. A byte is assumed to be an octet; the <code>sph_types.h</code>
106 * header contains a compile-time test which prevents compilation on
107 * architectures where this property is not met.
108 *
109 * The hash function output is also converted into bytes. All currently
110 * implemented hash functions have an output width which is a multiple of
111 * eight, and this is likely to remain true for new designs.
112 *
113 * Most hash functions internally convert input data into 32-bit of 64-bit
114 * words, using either little-endian or big-endian conversion. The hash
115 * output also often consists of such words, which are encoded into output
116 * bytes with a similar endianness convention. Some hash functions have
117 * been only loosely specified on that subject; when necessary,
118 * <code>sphlib</code> has been tested against published "reference"
119 * implementations in order to use the same conventions.
120 *
121 * @subsection shortname Function short name
122 *
123 * Each implemented hash function has a "short name" which is used
124 * internally to derive the identifiers for the functions and context
125 * structures which the function uses. For instance, MD5 has the short
126 * name <code>"md5"</code>. Short names are listed in the next section,
127 * for the implemented hash functions. In subsequent sections, the
128 * short name will be assumed to be <code>"XXX"</code>: replace with the
129 * actual hash function name to get the C identifier.
130 *
131 * Note: some functions within the same family share the same core
132 * elements, such as update function or context structure. Correspondingly,
133 * some of the defined types or functions may actually be macros which
134 * transparently evaluate to another type or function name.
135 *
136 * @subsection context Context structure
137 *
138 * Each implemented hash fonction has its own context structure, available
139 * under the type name <code>"sph_XXX_context"</code> for the hash function
140 * with short name <code>"XXX"</code>. This structure holds all needed
141 * state for a running hash computation.
142 *
143 * The contents of these structures are meant to be opaque, and private
144 * to the implementation. However, these contents are specified in the
145 * header files so that application code which uses <code>sphlib</code>
146 * may access the size of those structures.
147 *
148 * The caller is responsible for allocating the context structure,
149 * whether by dynamic allocation (<code>malloc()</code> or equivalent),
150 * static allocation (a global permanent variable), as an automatic
151 * variable ("on the stack"), or by any other mean which ensures proper
152 * structure alignment. <code>sphlib</code> code performs no dynamic
153 * allocation by itself.
154 *
155 * The context must be initialized before use, using the
156 * <code>sph_XXX_init()</code> function. This function sets the context
157 * state to proper initial values for hashing.
158 *
159 * Since all state data is contained within the context structure,
160 * <code>sphlib</code> is thread-safe and reentrant: several hash
161 * computations may be performed in parallel, provided that they do not
162 * operate on the same context. Moreover, a running computation can be
163 * cloned by copying the context (with a simple <code>memcpy()</code>):
164 * the context and its clone are then independant and may be updated
165 * with new data and/or closed without interfering with each other.
166 * Similarly, a context structure can be moved in memory at will:
167 * context structures contain no pointer, in particular no pointer to
168 * themselves.
169 *
170 * @subsection dataio Data input
171 *
172 * Hashed data is input with the <code>sph_XXX()</code> fonction, which
173 * takes as parameters a pointer to the context, a pointer to the data
174 * to hash, and the number of data bytes to hash. The context is updated
175 * with the new data.
176 *
177 * Data can be input in one or several calls, with arbitrary input lengths.
178 * However, it is best, performance wise, to input data by relatively big
179 * chunks (say a few kilobytes), because this allows <code>sphlib</code> to
180 * optimize things and avoid internal copying.
181 *
182 * When all data has been input, the context can be closed with
183 * <code>sph_XXX_close()</code>. The hash output is computed and written
184 * into the provided buffer. The caller must take care to provide a
185 * buffer of appropriate length; e.g., when using SHA-1, the output is
186 * a 20-byte word, therefore the output buffer must be at least 20-byte
187 * long.
188 *
189 * For some hash functions, the <code>sph_XXX_addbits_and_close()</code>
190 * function can be used instead of <code>sph_XXX_close()</code>. This
191 * function can take a few extra <strong>bits</strong> to be added at
192 * the end of the input message. This allows hashing messages with a
193 * bit length which is not a multiple of 8. The extra bits are provided
194 * as an unsigned integer value, and a bit count. The bit count must be
195 * between 0 and 7, inclusive. The extra bits are provided as bits 7 to
196 * 0 (bits of numerical value 128, 64, 32... downto 0), in that order.
197 * For instance, to add three bits of value 1, 1 and 0, the unsigned
198 * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count
199 * will be 3.
200 *
201 * The <code>SPH_SIZE_XXX</code> macro is defined for each hash function;
202 * it evaluates to the function output size, expressed in bits. For instance,
203 * <code>SPH_SIZE_sha1</code> evaluates to <code>160</code>.
204 *
205 * When closed, the context is automatically reinitialized and can be
206 * immediately used for another computation. It is not necessary to call
207 * <code>sph_XXX_init()</code> after a close. Note that
208 * <code>sph_XXX_init()</code> can still be called to "reset" a context,
209 * i.e. forget previously input data, and get back to the initial state.
210 *
211 * @subsection alignment Data alignment
212 *
213 * "Alignment" is a property of data, which is said to be "properly
214 * aligned" when its emplacement in memory is such that the data can
215 * be optimally read by full words. This depends on the type of access;
216 * basically, some hash functions will read data by 32-bit or 64-bit
217 * words. <code>sphlib</code> does not mandate such alignment for input
218 * data, but using aligned data can substantially improve performance.
219 *
220 * As a rule, it is best to input data by chunks whose length (in bytes)
221 * is a multiple of eight, and which begins at "generally aligned"
222 * addresses, such as the base address returned by a call to
223 * <code>malloc()</code>.
224 *
225 * @section functions Implemented functions
226 *
227 * We give here the list of implemented functions. They are grouped by
228 * family; to each family corresponds a specific header file. Each
229 * individual function has its associated "short name". Please refer to
230 * the documentation for that header file to get details on the hash
231 * function denomination and provenance.
232 *
233 * Note: the functions marked with a '(64)' in the list below are
234 * available only if the C compiler provides an integer type of length
235 * 64 bits or more. Such a type is mandatory in the latest C standard
236 * (ISO 9899:1999, aka "C99") and is present in several older compilers
237 * as well, so chances are that such a type is available.
238 *
239 * - HAVAL family: file <code>sph_haval.h</code>
240 * - HAVAL-128/3 (128-bit, 3 passes): short name: <code>haval128_3</code>
241 * - HAVAL-128/4 (128-bit, 4 passes): short name: <code>haval128_4</code>
242 * - HAVAL-128/5 (128-bit, 5 passes): short name: <code>haval128_5</code>
243 * - HAVAL-160/3 (160-bit, 3 passes): short name: <code>haval160_3</code>
244 * - HAVAL-160/4 (160-bit, 4 passes): short name: <code>haval160_4</code>
245 * - HAVAL-160/5 (160-bit, 5 passes): short name: <code>haval160_5</code>
246 * - HAVAL-192/3 (192-bit, 3 passes): short name: <code>haval192_3</code>
247 * - HAVAL-192/4 (192-bit, 4 passes): short name: <code>haval192_4</code>
248 * - HAVAL-192/5 (192-bit, 5 passes): short name: <code>haval192_5</code>
249 * - HAVAL-224/3 (224-bit, 3 passes): short name: <code>haval224_3</code>
250 * - HAVAL-224/4 (224-bit, 4 passes): short name: <code>haval224_4</code>
251 * - HAVAL-224/5 (224-bit, 5 passes): short name: <code>haval224_5</code>
252 * - HAVAL-256/3 (256-bit, 3 passes): short name: <code>haval256_3</code>
253 * - HAVAL-256/4 (256-bit, 4 passes): short name: <code>haval256_4</code>
254 * - HAVAL-256/5 (256-bit, 5 passes): short name: <code>haval256_5</code>
255 * - MD2: file <code>sph_md2.h</code>, short name: <code>md2</code>
256 * - MD4: file <code>sph_md4.h</code>, short name: <code>md4</code>
257 * - MD5: file <code>sph_md5.h</code>, short name: <code>md5</code>
258 * - PANAMA: file <code>sph_panama.h</code>, short name: <code>panama</code>
259 * - RadioGatun family: file <code>sph_radiogatun.h</code>
260 * - RadioGatun[32]: short name: <code>radiogatun32</code>
261 * - RadioGatun[64]: short name: <code>radiogatun64</code> (64)
262 * - RIPEMD family: file <code>sph_ripemd.h</code>
263 * - RIPEMD: short name: <code>ripemd</code>
264 * - RIPEMD-128: short name: <code>ripemd128</code>
265 * - RIPEMD-160: short name: <code>ripemd160</code>
266 * - SHA-0: file <code>sph_sha0.h</code>, short name: <code>sha0</code>
267 * - SHA-1: file <code>sph_sha1.h</code>, short name: <code>sha1</code>
268 * - SHA-2 family, 32-bit hashes: file <code>sph_sha2.h</code>
269 * - SHA-224: short name: <code>sha224</code>
270 * - SHA-256: short name: <code>sha256</code>
271 * - SHA-384: short name: <code>sha384</code> (64)
272 * - SHA-512: short name: <code>sha512</code> (64)
273 * - Tiger family: file <code>sph_tiger.h</code>
274 * - Tiger: short name: <code>tiger</code> (64)
275 * - Tiger2: short name: <code>tiger2</code> (64)
276 * - WHIRLPOOL family: file <code>sph_whirlpool.h</code>
277 * - WHIRLPOOL-0: short name: <code>whirlpool0</code> (64)
278 * - WHIRLPOOL-1: short name: <code>whirlpool1</code> (64)
279 * - WHIRLPOOL: short name: <code>whirlpool</code> (64)
280 *
281 * The fourteen second-round SHA-3 candidates are also implemented;
282 * when applicable, the implementations follow the "final" specifications
283 * as published for the third round of the SHA-3 competition (BLAKE,
284 * Groestl, JH, Keccak and Skein have been tweaked for third round).
285 *
286 * - BLAKE family: file <code>sph_blake.h</code>
287 * - BLAKE-224: short name: <code>blake224</code>
288 * - BLAKE-256: short name: <code>blake256</code>
289 * - BLAKE-384: short name: <code>blake384</code>
290 * - BLAKE-512: short name: <code>blake512</code>
291 * - BMW (Blue Midnight Wish) family: file <code>sph_bmw.h</code>
292 * - BMW-224: short name: <code>bmw224</code>
293 * - BMW-256: short name: <code>bmw256</code>
294 * - BMW-384: short name: <code>bmw384</code> (64)
295 * - BMW-512: short name: <code>bmw512</code> (64)
296 * - CubeHash family: file <code>sph_cubehash.h</code> (specified as
297 * CubeHash16/32 in the CubeHash specification)
298 * - CubeHash-224: short name: <code>cubehash224</code>
299 * - CubeHash-256: short name: <code>cubehash256</code>
300 * - CubeHash-384: short name: <code>cubehash384</code>
301 * - CubeHash-512: short name: <code>cubehash512</code>
302 * - ECHO family: file <code>sph_echo.h</code>
303 * - ECHO-224: short name: <code>echo224</code>
304 * - ECHO-256: short name: <code>echo256</code>
305 * - ECHO-384: short name: <code>echo384</code>
306 * - ECHO-512: short name: <code>echo512</code>
307 * - Fugue family: file <code>sph_fugue.h</code>
308 * - Fugue-224: short name: <code>fugue224</code>
309 * - Fugue-256: short name: <code>fugue256</code>
310 * - Fugue-384: short name: <code>fugue384</code>
311 * - Fugue-512: short name: <code>fugue512</code>
312 * - Groestl family: file <code>sph_groestl.h</code>
313 * - Groestl-224: short name: <code>groestl224</code>
314 * - Groestl-256: short name: <code>groestl256</code>
315 * - Groestl-384: short name: <code>groestl384</code>
316 * - Groestl-512: short name: <code>groestl512</code>
317 * - Hamsi family: file <code>sph_hamsi.h</code>
318 * - Hamsi-224: short name: <code>hamsi224</code>
319 * - Hamsi-256: short name: <code>hamsi256</code>
320 * - Hamsi-384: short name: <code>hamsi384</code>
321 * - Hamsi-512: short name: <code>hamsi512</code>
322 * - JH family: file <code>sph_jh.h</code>
323 * - JH-224: short name: <code>jh224</code>
324 * - JH-256: short name: <code>jh256</code>
325 * - JH-384: short name: <code>jh384</code>
326 * - JH-512: short name: <code>jh512</code>
327 * - Keccak family: file <code>sph_keccak.h</code>
328 * - Keccak-224: short name: <code>keccak224</code>
329 * - Keccak-256: short name: <code>keccak256</code>
330 * - Keccak-384: short name: <code>keccak384</code>
331 * - Keccak-512: short name: <code>keccak512</code>
332 * - Luffa family: file <code>sph_luffa.h</code>
333 * - Luffa-224: short name: <code>luffa224</code>
334 * - Luffa-256: short name: <code>luffa256</code>
335 * - Luffa-384: short name: <code>luffa384</code>
336 * - Luffa-512: short name: <code>luffa512</code>
337 * - Shabal family: file <code>sph_shabal.h</code>
338 * - Shabal-192: short name: <code>shabal192</code>
339 * - Shabal-224: short name: <code>shabal224</code>
340 * - Shabal-256: short name: <code>shabal256</code>
341 * - Shabal-384: short name: <code>shabal384</code>
342 * - Shabal-512: short name: <code>shabal512</code>
343 * - SHAvite-3 family: file <code>sph_shavite.h</code>
344 * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"):
345 * short name: <code>shabal224</code>
346 * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"):
347 * short name: <code>shabal256</code>
348 * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"):
349 * short name: <code>shabal384</code>
350 * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"):
351 * short name: <code>shabal512</code>
352 * - SIMD family: file <code>sph_simd.h</code>
353 * - SIMD-224: short name: <code>simd224</code>
354 * - SIMD-256: short name: <code>simd256</code>
355 * - SIMD-384: short name: <code>simd384</code>
356 * - SIMD-512: short name: <code>simd512</code>
357 * - Skein family: file <code>sph_skein.h</code>
358 * - Skein-224 (nominally specified as Skein-512-224): short name:
359 * <code>skein224</code> (64)
360 * - Skein-256 (nominally specified as Skein-512-256): short name:
361 * <code>skein256</code> (64)
362 * - Skein-384 (nominally specified as Skein-512-384): short name:
363 * <code>skein384</code> (64)
364 * - Skein-512 (nominally specified as Skein-512-512): short name:
365 * <code>skein512</code> (64)
366 *
367 * For the second-round SHA-3 candidates, the functions are as specified
368 * for round 2, i.e. with the "tweaks" that some candidates added
369 * between round 1 and round 2. Also, some of the submitted packages for
370 * round 2 contained errors, in the specification, reference code, or
371 * both. <code>sphlib</code> implements the corrected versions.
372 */
373
374/** @hideinitializer
375 * Unsigned integer type whose length is at least 32 bits; on most
376 * architectures, it will have a width of exactly 32 bits. Unsigned C
377 * types implement arithmetics modulo a power of 2; use the
378 * <code>SPH_T32()</code> macro to ensure that the value is truncated
379 * to exactly 32 bits. Unless otherwise specified, all macros and
380 * functions which accept <code>sph_u32</code> values assume that these
381 * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures
382 * where <code>sph_u32</code> is larger than that.
383 */
384typedef __arch_dependant__ sph_u32;
385
386/** @hideinitializer
387 * Signed integer type corresponding to <code>sph_u32</code>; it has
388 * width 32 bits or more.
389 */
390typedef __arch_dependant__ sph_s32;
391
392/** @hideinitializer
393 * Unsigned integer type whose length is at least 64 bits; on most
394 * architectures which feature such a type, it will have a width of
395 * exactly 64 bits. C99-compliant platform will have this type; it
396 * is also defined when the GNU compiler (gcc) is used, and on
397 * platforms where <code>unsigned long</code> is large enough. If this
398 * type is not available, then some hash functions which depends on
399 * a 64-bit type will not be available (most notably SHA-384, SHA-512,
400 * Tiger and WHIRLPOOL).
401 */
402typedef __arch_dependant__ sph_u64;
403
404/** @hideinitializer
405 * Signed integer type corresponding to <code>sph_u64</code>; it has
406 * width 64 bits or more.
407 */
408typedef __arch_dependant__ sph_s64;
409
410/**
411 * This macro expands the token <code>x</code> into a suitable
412 * constant expression of type <code>sph_u32</code>. Depending on
413 * how this type is defined, a suffix such as <code>UL</code> may
414 * be appended to the argument.
415 *
416 * @param x the token to expand into a suitable constant expression
417 */
418#define SPH_C32(x)
419
420/**
421 * Truncate a 32-bit value to exactly 32 bits. On most systems, this is
422 * a no-op, recognized as such by the compiler.
423 *
424 * @param x the value to truncate (of type <code>sph_u32</code>)
425 */
426#define SPH_T32(x)
427
428/**
429 * Rotate a 32-bit value by a number of bits to the left. The rotate
430 * count must reside between 1 and 31. This macro assumes that its
431 * first argument fits in 32 bits (no extra bit allowed on machines where
432 * <code>sph_u32</code> is wider); both arguments may be evaluated
433 * several times.
434 *
435 * @param x the value to rotate (of type <code>sph_u32</code>)
436 * @param n the rotation count (between 1 and 31, inclusive)
437 */
438#define SPH_ROTL32(x, n)
439
440/**
441 * Rotate a 32-bit value by a number of bits to the left. The rotate
442 * count must reside between 1 and 31. This macro assumes that its
443 * first argument fits in 32 bits (no extra bit allowed on machines where
444 * <code>sph_u32</code> is wider); both arguments may be evaluated
445 * several times.
446 *
447 * @param x the value to rotate (of type <code>sph_u32</code>)
448 * @param n the rotation count (between 1 and 31, inclusive)
449 */
450#define SPH_ROTR32(x, n)
451
452/**
453 * This macro is defined on systems for which a 64-bit type has been
454 * detected, and is used for <code>sph_u64</code>.
455 */
456#define SPH_64
457
458/**
459 * This macro is defined on systems for the "native" integer size is
460 * 64 bits (64-bit values fit in one register).
461 */
462#define SPH_64_TRUE
463
464/**
465 * This macro expands the token <code>x</code> into a suitable
466 * constant expression of type <code>sph_u64</code>. Depending on
467 * how this type is defined, a suffix such as <code>ULL</code> may
468 * be appended to the argument. This macro is defined only if a
469 * 64-bit type was detected and used for <code>sph_u64</code>.
470 *
471 * @param x the token to expand into a suitable constant expression
472 */
473#define SPH_C64(x)
474
475/**
476 * Truncate a 64-bit value to exactly 64 bits. On most systems, this is
477 * a no-op, recognized as such by the compiler. This macro is defined only
478 * if a 64-bit type was detected and used for <code>sph_u64</code>.
479 *
480 * @param x the value to truncate (of type <code>sph_u64</code>)
481 */
482#define SPH_T64(x)
483
484/**
485 * Rotate a 64-bit value by a number of bits to the left. The rotate
486 * count must reside between 1 and 63. This macro assumes that its
487 * first argument fits in 64 bits (no extra bit allowed on machines where
488 * <code>sph_u64</code> is wider); both arguments may be evaluated
489 * several times. This macro is defined only if a 64-bit type was detected
490 * and used for <code>sph_u64</code>.
491 *
492 * @param x the value to rotate (of type <code>sph_u64</code>)
493 * @param n the rotation count (between 1 and 63, inclusive)
494 */
495#define SPH_ROTL64(x, n)
496
497/**
498 * Rotate a 64-bit value by a number of bits to the left. The rotate
499 * count must reside between 1 and 63. This macro assumes that its
500 * first argument fits in 64 bits (no extra bit allowed on machines where
501 * <code>sph_u64</code> is wider); both arguments may be evaluated
502 * several times. This macro is defined only if a 64-bit type was detected
503 * and used for <code>sph_u64</code>.
504 *
505 * @param x the value to rotate (of type <code>sph_u64</code>)
506 * @param n the rotation count (between 1 and 63, inclusive)
507 */
508#define SPH_ROTR64(x, n)
509
510/**
511 * This macro evaluates to <code>inline</code> or an equivalent construction,
512 * if available on the compilation platform, or to nothing otherwise. This
513 * is used to declare inline functions, for which the compiler should
514 * endeavour to include the code directly in the caller. Inline functions
515 * are typically defined in header files as replacement for macros.
516 */
517#define SPH_INLINE
518
519/**
520 * This macro is defined if the platform has been detected as using
521 * little-endian convention. This implies that the <code>sph_u32</code>
522 * type (and the <code>sph_u64</code> type also, if it is defined) has
523 * an exact width (i.e. exactly 32-bit, respectively 64-bit).
524 */
525#define SPH_LITTLE_ENDIAN
526
527/**
528 * This macro is defined if the platform has been detected as using
529 * big-endian convention. This implies that the <code>sph_u32</code>
530 * type (and the <code>sph_u64</code> type also, if it is defined) has
531 * an exact width (i.e. exactly 32-bit, respectively 64-bit).
532 */
533#define SPH_BIG_ENDIAN
534
535/**
536 * This macro is defined if 32-bit words (and 64-bit words, if defined)
537 * can be read from and written to memory efficiently in little-endian
538 * convention. This is the case for little-endian platforms, and also
539 * for the big-endian platforms which have special little-endian access
540 * opcodes (e.g. Ultrasparc).
541 */
542#define SPH_LITTLE_FAST
543
544/**
545 * This macro is defined if 32-bit words (and 64-bit words, if defined)
546 * can be read from and written to memory efficiently in big-endian
547 * convention. This is the case for little-endian platforms, and also
548 * for the little-endian platforms which have special big-endian access
549 * opcodes.
550 */
551#define SPH_BIG_FAST
552
553/**
554 * On some platforms, this macro is defined to an unsigned integer type
555 * into which pointer values may be cast. The resulting value can then
556 * be tested for being a multiple of 2, 4 or 8, indicating an aligned
557 * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses.
558 */
559#define SPH_UPTR
560
561/**
562 * When defined, this macro indicates that unaligned memory accesses
563 * are possible with only a minor penalty, and thus should be prefered
564 * over strategies which first copy data to an aligned buffer.
565 */
566#define SPH_UNALIGNED
567
568/**
569 * Byte-swap a 32-bit word (i.e. <code>0x12345678</code> becomes
570 * <code>0x78563412</code>). This is an inline function which resorts
571 * to inline assembly on some platforms, for better performance.
572 *
573 * @param x the 32-bit value to byte-swap
574 * @return the byte-swapped value
575 */
576static inline sph_u32 sph_bswap32(sph_u32 x);
577
578/**
579 * Byte-swap a 64-bit word. This is an inline function which resorts
580 * to inline assembly on some platforms, for better performance. This
581 * function is defined only if a suitable 64-bit type was found for
582 * <code>sph_u64</code>
583 *
584 * @param x the 64-bit value to byte-swap
585 * @return the byte-swapped value
586 */
587static inline sph_u64 sph_bswap64(sph_u64 x);
588
589/**
590 * Decode a 16-bit unsigned value from memory, in little-endian convention
591 * (least significant byte comes first).
592 *
593 * @param src the source address
594 * @return the decoded value
595 */
596static inline unsigned sph_dec16le(const void *src);
597
598/**
599 * Encode a 16-bit unsigned value into memory, in little-endian convention
600 * (least significant byte comes first).
601 *
602 * @param dst the destination buffer
603 * @param val the value to encode
604 */
605static inline void sph_enc16le(void *dst, unsigned val);
606
607/**
608 * Decode a 16-bit unsigned value from memory, in big-endian convention
609 * (most significant byte comes first).
610 *
611 * @param src the source address
612 * @return the decoded value
613 */
614static inline unsigned sph_dec16be(const void *src);
615
616/**
617 * Encode a 16-bit unsigned value into memory, in big-endian convention
618 * (most significant byte comes first).
619 *
620 * @param dst the destination buffer
621 * @param val the value to encode
622 */
623static inline void sph_enc16be(void *dst, unsigned val);
624
625/**
626 * Decode a 32-bit unsigned value from memory, in little-endian convention
627 * (least significant byte comes first).
628 *
629 * @param src the source address
630 * @return the decoded value
631 */
632static inline sph_u32 sph_dec32le(const void *src);
633
634/**
635 * Decode a 32-bit unsigned value from memory, in little-endian convention
636 * (least significant byte comes first). This function assumes that the
637 * source address is suitably aligned for a direct access, if the platform
638 * supports such things; it can thus be marginally faster than the generic
639 * <code>sph_dec32le()</code> function.
640 *
641 * @param src the source address
642 * @return the decoded value
643 */
644static inline sph_u32 sph_dec32le_aligned(const void *src);
645
646/**
647 * Encode a 32-bit unsigned value into memory, in little-endian convention
648 * (least significant byte comes first).
649 *
650 * @param dst the destination buffer
651 * @param val the value to encode
652 */
653static inline void sph_enc32le(void *dst, sph_u32 val);
654
655/**
656 * Encode a 32-bit unsigned value into memory, in little-endian convention
657 * (least significant byte comes first). This function assumes that the
658 * destination address is suitably aligned for a direct access, if the
659 * platform supports such things; it can thus be marginally faster than
660 * the generic <code>sph_enc32le()</code> function.
661 *
662 * @param dst the destination buffer
663 * @param val the value to encode
664 */
665static inline void sph_enc32le_aligned(void *dst, sph_u32 val);
666
667/**
668 * Decode a 32-bit unsigned value from memory, in big-endian convention
669 * (most significant byte comes first).
670 *
671 * @param src the source address
672 * @return the decoded value
673 */
674static inline sph_u32 sph_dec32be(const void *src);
675
676/**
677 * Decode a 32-bit unsigned value from memory, in big-endian convention
678 * (most significant byte comes first). This function assumes that the
679 * source address is suitably aligned for a direct access, if the platform
680 * supports such things; it can thus be marginally faster than the generic
681 * <code>sph_dec32be()</code> function.
682 *
683 * @param src the source address
684 * @return the decoded value
685 */
686static inline sph_u32 sph_dec32be_aligned(const void *src);
687
688/**
689 * Encode a 32-bit unsigned value into memory, in big-endian convention
690 * (most significant byte comes first).
691 *
692 * @param dst the destination buffer
693 * @param val the value to encode
694 */
695static inline void sph_enc32be(void *dst, sph_u32 val);
696
697/**
698 * Encode a 32-bit unsigned value into memory, in big-endian convention
699 * (most significant byte comes first). This function assumes that the
700 * destination address is suitably aligned for a direct access, if the
701 * platform supports such things; it can thus be marginally faster than
702 * the generic <code>sph_enc32be()</code> function.
703 *
704 * @param dst the destination buffer
705 * @param val the value to encode
706 */
707static inline void sph_enc32be_aligned(void *dst, sph_u32 val);
708
709/**
710 * Decode a 64-bit unsigned value from memory, in little-endian convention
711 * (least significant byte comes first). This function is defined only
712 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
713 *
714 * @param src the source address
715 * @return the decoded value
716 */
717static inline sph_u64 sph_dec64le(const void *src);
718
719/**
720 * Decode a 64-bit unsigned value from memory, in little-endian convention
721 * (least significant byte comes first). This function assumes that the
722 * source address is suitably aligned for a direct access, if the platform
723 * supports such things; it can thus be marginally faster than the generic
724 * <code>sph_dec64le()</code> function. This function is defined only
725 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
726 *
727 * @param src the source address
728 * @return the decoded value
729 */
730static inline sph_u64 sph_dec64le_aligned(const void *src);
731
732/**
733 * Encode a 64-bit unsigned value into memory, in little-endian convention
734 * (least significant byte comes first). This function is defined only
735 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
736 *
737 * @param dst the destination buffer
738 * @param val the value to encode
739 */
740static inline void sph_enc64le(void *dst, sph_u64 val);
741
742/**
743 * Encode a 64-bit unsigned value into memory, in little-endian convention
744 * (least significant byte comes first). This function assumes that the
745 * destination address is suitably aligned for a direct access, if the
746 * platform supports such things; it can thus be marginally faster than
747 * the generic <code>sph_enc64le()</code> function. This function is defined
748 * only if a suitable 64-bit type was detected and used for
749 * <code>sph_u64</code>.
750 *
751 * @param dst the destination buffer
752 * @param val the value to encode
753 */
754static inline void sph_enc64le_aligned(void *dst, sph_u64 val);
755
756/**
757 * Decode a 64-bit unsigned value from memory, in big-endian convention
758 * (most significant byte comes first). This function is defined only
759 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
760 *
761 * @param src the source address
762 * @return the decoded value
763 */
764static inline sph_u64 sph_dec64be(const void *src);
765
766/**
767 * Decode a 64-bit unsigned value from memory, in big-endian convention
768 * (most significant byte comes first). This function assumes that the
769 * source address is suitably aligned for a direct access, if the platform
770 * supports such things; it can thus be marginally faster than the generic
771 * <code>sph_dec64be()</code> function. This function is defined only
772 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
773 *
774 * @param src the source address
775 * @return the decoded value
776 */
777static inline sph_u64 sph_dec64be_aligned(const void *src);
778
779/**
780 * Encode a 64-bit unsigned value into memory, in big-endian convention
781 * (most significant byte comes first). This function is defined only
782 * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
783 *
784 * @param dst the destination buffer
785 * @param val the value to encode
786 */
787static inline void sph_enc64be(void *dst, sph_u64 val);
788
789/**
790 * Encode a 64-bit unsigned value into memory, in big-endian convention
791 * (most significant byte comes first). This function assumes that the
792 * destination address is suitably aligned for a direct access, if the
793 * platform supports such things; it can thus be marginally faster than
794 * the generic <code>sph_enc64be()</code> function. This function is defined
795 * only if a suitable 64-bit type was detected and used for
796 * <code>sph_u64</code>.
797 *
798 * @param dst the destination buffer
799 * @param val the value to encode
800 */
801static inline void sph_enc64be_aligned(void *dst, sph_u64 val);
802
803#endif
804
805/* ============== END documentation block for Doxygen ============= */
806
807#ifndef DOXYGEN_IGNORE
808
809/*
810 * We want to define the types "sph_u32" and "sph_u64" which hold
811 * unsigned values of at least, respectively, 32 and 64 bits. These
812 * tests should select appropriate types for most platforms. The
813 * macro "SPH_64" is defined if the 64-bit is supported.
814 */
815
816#undef SPH_64
817#undef SPH_64_TRUE
818
819#if defined __STDC__ && __STDC_VERSION__ >= 199901L
820
821/*
822 * On C99 implementations, we can use <stdint.h> to get an exact 64-bit
823 * type, if any, or otherwise use a wider type (which must exist, for
824 * C99 conformance).
825 */
826
827#include <stdint.h>
828
829#ifdef UINT32_MAX
830typedef uint32_t sph_u32;
831typedef int32_t sph_s32;
832#else
833typedef uint_fast32_t sph_u32;
834typedef int_fast32_t sph_s32;
835#endif
836#if !SPH_NO_64
837#ifdef UINT64_MAX
838typedef uint64_t sph_u64;
839typedef int64_t sph_s64;
840#else
841typedef uint_fast64_t sph_u64;
842typedef int_fast64_t sph_s64;
843#endif
844#endif
845
846#define SPH_C32(x) ((sph_u32)(x))
847#if !SPH_NO_64
848#define SPH_C64(x) ((sph_u64)(x))
849#define SPH_64 1
850#endif
851
852#else
853
854/*
855 * On non-C99 systems, we use "unsigned int" if it is wide enough,
856 * "unsigned long" otherwise. This supports all "reasonable" architectures.
857 * We have to be cautious: pre-C99 preprocessors handle constants
858 * differently in '#if' expressions. Hence the shifts to test UINT_MAX.
859 */
860
861#if ((UINT_MAX >> 11) >> 11) >= 0x3FF
862
863typedef unsigned int sph_u32;
864typedef int sph_s32;
865
866#define SPH_C32(x) ((sph_u32)(x ## U))
867
868#else
869
870typedef unsigned long sph_u32;
871typedef long sph_s32;
872
873#define SPH_C32(x) ((sph_u32)(x ## UL))
874
875#endif
876
877#if !SPH_NO_64
878
879/*
880 * We want a 64-bit type. We use "unsigned long" if it is wide enough (as
881 * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
882 * "unsigned long long" otherwise, if available. We use ULLONG_MAX to
883 * test whether "unsigned long long" is available; we also know that
884 * gcc features this type, even if the libc header do not know it.
885 */
886
887#if ((ULONG_MAX >> 31) >> 31) >= 3
888
889typedef unsigned long sph_u64;
890typedef long sph_s64;
891
892#define SPH_C64(x) ((sph_u64)(x ## UL))
893
894#define SPH_64 1
895
896#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
897
898typedef unsigned long long sph_u64;
899typedef long long sph_s64;
900
901#define SPH_C64(x) ((sph_u64)(x ## ULL))
902
903#define SPH_64 1
904
905#else
906
907/*
908 * No 64-bit type...
909 */
910
911#endif
912
913#endif
914
915#endif
916
917/*
918 * If the "unsigned long" type has length 64 bits or more, then this is
919 * a "true" 64-bit architectures. This is also true with Visual C on
920 * amd64, even though the "long" type is limited to 32 bits.
921 */
922#if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64)
923#define SPH_64_TRUE 1
924#endif
925
926/*
927 * Implementation note: some processors have specific opcodes to perform
928 * a rotation. Recent versions of gcc recognize the expression above and
929 * use the relevant opcodes, when appropriate.
930 */
931
932#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
933#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
934#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
935
936#if SPH_64
937
938#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
939#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
940#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
941
942#endif
943
944#ifndef DOXYGEN_IGNORE
945/*
946 * Define SPH_INLINE to be an "inline" qualifier, if available. We define
947 * some small macro-like functions which benefit greatly from being inlined.
948 */
949#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__
950#define SPH_INLINE inline
951#elif defined _MSC_VER
952#define SPH_INLINE __inline
953#else
954#define SPH_INLINE
955#endif
956#endif
957
958/*
959 * We define some macros which qualify the architecture. These macros
960 * may be explicit set externally (e.g. as compiler parameters). The
961 * code below sets those macros if they are not already defined.
962 *
963 * Most macros are boolean, thus evaluate to either zero or non-zero.
964 * The SPH_UPTR macro is special, in that it evaluates to a C type,
965 * or is not defined.
966 *
967 * SPH_UPTR if defined: unsigned type to cast pointers into
968 *
969 * SPH_UNALIGNED non-zero if unaligned accesses are efficient
970 * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian
971 * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian
972 * SPH_LITTLE_FAST non-zero if little-endian decoding is fast
973 * SPH_BIG_FAST non-zero if big-endian decoding is fast
974 *
975 * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit
976 * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN
977 * _must_ be non-zero in those situations. The 32-bit and 64-bit types
978 * _must_ also have an exact width.
979 *
980 * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode
981 * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode
982 * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc
983 * SPH_I386_GCC x86-compatible (32-bit) with gcc
984 * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C
985 * SPH_AMD64_GCC x86-compatible (64-bit) with gcc
986 * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C
987 * SPH_PPC32_GCC PowerPC, 32-bit, with gcc
988 * SPH_PPC64_GCC PowerPC, 64-bit, with gcc
989 *
990 * TODO: enhance automatic detection, for more architectures and compilers.
991 * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with
992 * some very fast functions (e.g. MD4) when using unaligned input data.
993 * The CPU-specific-with-GCC macros are useful only for inline assembly,
994 * normally restrained to this header file.
995 */
996
997/*
998 * 32-bit x86, aka "i386 compatible".
999 */
1000#if defined __i386__ || defined _M_IX86
1001
1002#define SPH_DETECT_UNALIGNED 1
1003#define SPH_DETECT_LITTLE_ENDIAN 1
1004#define SPH_DETECT_UPTR sph_u32
1005#ifdef __GNUC__
1006#define SPH_DETECT_I386_GCC 1
1007#endif
1008#ifdef _MSC_VER
1009#define SPH_DETECT_I386_MSVC 1
1010#endif
1011
1012/*
1013 * 64-bit x86, hereafter known as "amd64".
1014 */
1015#elif defined __x86_64 || defined _M_X64
1016
1017#define SPH_DETECT_UNALIGNED 1
1018#define SPH_DETECT_LITTLE_ENDIAN 1
1019#define SPH_DETECT_UPTR sph_u64
1020#ifdef __GNUC__
1021#define SPH_DETECT_AMD64_GCC 1
1022#endif
1023#ifdef _MSC_VER
1024#define SPH_DETECT_AMD64_MSVC 1
1025#endif
1026
1027/*
1028 * 64-bit Sparc architecture (implies v9).
1029 */
1030#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \
1031 || defined __sparcv9
1032
1033#define SPH_DETECT_BIG_ENDIAN 1
1034#define SPH_DETECT_UPTR sph_u64
1035#ifdef __GNUC__
1036#define SPH_DETECT_SPARCV9_GCC_64 1
1037#define SPH_DETECT_LITTLE_FAST 1
1038#endif
1039
1040/*
1041 * 32-bit Sparc.
1042 */
1043#elif (defined __sparc__ || defined __sparc) \
1044 && !(defined __sparcv9 || defined __arch64__)
1045
1046#define SPH_DETECT_BIG_ENDIAN 1
1047#define SPH_DETECT_UPTR sph_u32
1048#if defined __GNUC__ && defined __sparc_v9__
1049#define SPH_DETECT_SPARCV9_GCC_32 1
1050#define SPH_DETECT_LITTLE_FAST 1
1051#endif
1052
1053/*
1054 * ARM, little-endian.
1055 */
1056#elif defined __arm__ && __ARMEL__
1057
1058#define SPH_DETECT_LITTLE_ENDIAN 1
1059
1060/*
1061 * MIPS, little-endian.
1062 */
1063#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__
1064
1065#define SPH_DETECT_LITTLE_ENDIAN 1
1066
1067/*
1068 * MIPS, big-endian.
1069 */
1070#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__
1071
1072#define SPH_DETECT_BIG_ENDIAN 1
1073
1074/*
1075 * PowerPC.
1076 */
1077#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \
1078 || defined _ARCH_PPC
1079
1080/*
1081 * Note: we do not declare cross-endian access to be "fast": even if
1082 * using inline assembly, implementation should still assume that
1083 * keeping the decoded word in a temporary is faster than decoding
1084 * it again.
1085 */
1086#if defined __GNUC__
1087#if SPH_64_TRUE
1088#define SPH_DETECT_PPC64_GCC 1
1089#else
1090#define SPH_DETECT_PPC32_GCC 1
1091#endif
1092#endif
1093
1094#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1095#define SPH_DETECT_BIG_ENDIAN 1
1096#elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
1097#define SPH_DETECT_LITTLE_ENDIAN 1
1098#endif
1099
1100/*
1101 * Itanium, 64-bit.
1102 */
1103#elif defined __ia64 || defined __ia64__ \
1104 || defined __itanium__ || defined _M_IA64
1105
1106#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
1107#define SPH_DETECT_BIG_ENDIAN 1
1108#else
1109#define SPH_DETECT_LITTLE_ENDIAN 1
1110#endif
1111#if defined __LP64__ || defined _LP64
1112#define SPH_DETECT_UPTR sph_u64
1113#else
1114#define SPH_DETECT_UPTR sph_u32
1115#endif
1116
1117#endif
1118
1119#if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64
1120#define SPH_DETECT_SPARCV9_GCC 1
1121#endif
1122
1123#if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED
1124#define SPH_UNALIGNED SPH_DETECT_UNALIGNED
1125#endif
1126#if defined SPH_DETECT_UPTR && !defined SPH_UPTR
1127#define SPH_UPTR SPH_DETECT_UPTR
1128#endif
1129#if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN
1130#define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN
1131#endif
1132#if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN
1133#define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN
1134#endif
1135#if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST
1136#define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST
1137#endif
1138#if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST
1139#define SPH_BIG_FAST SPH_DETECT_BIG_FAST
1140#endif
1141#if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32
1142#define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32
1143#endif
1144#if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64
1145#define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64
1146#endif
1147#if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC
1148#define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC
1149#endif
1150#if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC
1151#define SPH_I386_GCC SPH_DETECT_I386_GCC
1152#endif
1153#if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC
1154#define SPH_I386_MSVC SPH_DETECT_I386_MSVC
1155#endif
1156#if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC
1157#define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC
1158#endif
1159#if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC
1160#define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC
1161#endif
1162#if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC
1163#define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC
1164#endif
1165#if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC
1166#define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC
1167#endif
1168
1169#if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST
1170#define SPH_LITTLE_FAST 1
1171#endif
1172#if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST
1173#define SPH_BIG_FAST 1
1174#endif
1175
1176#if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN)
1177#error SPH_UPTR defined, but endianness is not known.
1178#endif
1179
1180#if SPH_I386_GCC && !SPH_NO_ASM
1181
1182/*
1183 * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1184 * values.
1185 */
1186
1187static SPH_INLINE sph_u32
1188sph_bswap32(sph_u32 x)
1189{
1190 __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1191 return x;
1192}
1193
1194#if SPH_64
1195
1196static SPH_INLINE sph_u64
1197sph_bswap64(sph_u64 x)
1198{
1199 return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1200 | (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1201}
1202
1203#endif
1204
1205#elif SPH_AMD64_GCC && !SPH_NO_ASM
1206
1207/*
1208 * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
1209 * and 64-bit values.
1210 */
1211
1212static SPH_INLINE sph_u32
1213sph_bswap32(sph_u32 x)
1214{
1215 __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
1216 return x;
1217}
1218
1219#if SPH_64
1220
1221static SPH_INLINE sph_u64
1222sph_bswap64(sph_u64 x)
1223{
1224 __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x));
1225 return x;
1226}
1227
1228#endif
1229
1230/*
1231 * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough
1232 * to generate proper opcodes for endianness swapping with the pure C
1233 * implementation below.
1234 *
1235
1236#elif SPH_I386_MSVC && !SPH_NO_ASM
1237
1238static __inline sph_u32 __declspec(naked) __fastcall
1239sph_bswap32(sph_u32 x)
1240{
1241 __asm {
1242 bswap ecx
1243 mov eax,ecx
1244 ret
1245 }
1246}
1247
1248#if SPH_64
1249
1250static SPH_INLINE sph_u64
1251sph_bswap64(sph_u64 x)
1252{
1253 return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
1254 | (sph_u64)sph_bswap32((sph_u32)(x >> 32));
1255}
1256
1257#endif
1258
1259 *
1260 * [end of disabled code]
1261 */
1262
1263#else
1264
1265static SPH_INLINE sph_u32
1266sph_bswap32(sph_u32 x)
1267{
1268 x = SPH_T32((x << 16) | (x >> 16));
1269 x = ((x & SPH_C32(0xFF00FF00)) >> 8)
1270 | ((x & SPH_C32(0x00FF00FF)) << 8);
1271 return x;
1272}
1273
1274#if SPH_64
1275
1276/**
1277 * Byte-swap a 64-bit value.
1278 *
1279 * @param x the input value
1280 * @return the byte-swapped value
1281 */
1282static SPH_INLINE sph_u64
1283sph_bswap64(sph_u64 x)
1284{
1285 x = SPH_T64((x << 32) | (x >> 32));
1286 x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16)
1287 | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16);
1288 x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8)
1289 | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8);
1290 return x;
1291}
1292
1293#endif
1294
1295#endif
1296
1297#if SPH_SPARCV9_GCC && !SPH_NO_ASM
1298
1299/*
1300 * On UltraSPARC systems, native ordering is big-endian, but it is
1301 * possible to perform little-endian read accesses by specifying the
1302 * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use
1303 * the opcode "lda [%reg]0x88,%dst", where %reg is the register which
1304 * contains the source address and %dst is the destination register,
1305 * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register
1306 * to get the address space name. The latter format is better since it
1307 * combines an addition and the actual access in a single opcode; but
1308 * it requires the setting (and subsequent resetting) of %asi, which is
1309 * slow. Some operations (i.e. MD5 compression function) combine many
1310 * successive little-endian read accesses, which may share the same
1311 * %asi setting. The macros below contain the appropriate inline
1312 * assembly.
1313 */
1314
1315#define SPH_SPARCV9_SET_ASI \
1316 sph_u32 sph_sparcv9_asi; \
1317 __asm__ __volatile__ ( \
1318 "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi));
1319
1320#define SPH_SPARCV9_RESET_ASI \
1321 __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi));
1322
1323#define SPH_SPARCV9_DEC32LE(base, idx) ({ \
1324 sph_u32 sph_sparcv9_tmp; \
1325 __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \
1326 : "=r" (sph_sparcv9_tmp) : "r" (base)); \
1327 sph_sparcv9_tmp; \
1328 })
1329
1330#endif
1331
1332static SPH_INLINE void
1333sph_enc16be(void *dst, unsigned val)
1334{
1335 ((unsigned char *)dst)[0] = (val >> 8);
1336 ((unsigned char *)dst)[1] = val;
1337}
1338
1339static SPH_INLINE unsigned
1340sph_dec16be(const void *src)
1341{
1342 return ((unsigned)(((const unsigned char *)src)[0]) << 8)
1343 | (unsigned)(((const unsigned char *)src)[1]);
1344}
1345
1346static SPH_INLINE void
1347sph_enc16le(void *dst, unsigned val)
1348{
1349 ((unsigned char *)dst)[0] = val;
1350 ((unsigned char *)dst)[1] = val >> 8;
1351}
1352
1353static SPH_INLINE unsigned
1354sph_dec16le(const void *src)
1355{
1356 return (unsigned)(((const unsigned char *)src)[0])
1357 | ((unsigned)(((const unsigned char *)src)[1]) << 8);
1358}
1359
1360/**
1361 * Encode a 32-bit value into the provided buffer (big endian convention).
1362 *
1363 * @param dst the destination buffer
1364 * @param val the 32-bit value to encode
1365 */
1366static SPH_INLINE void
1367sph_enc32be(void *dst, sph_u32 val)
1368{
1369#if defined SPH_UPTR
1370#if SPH_UNALIGNED
1371#if SPH_LITTLE_ENDIAN
1372 val = sph_bswap32(val);
1373#endif
1374 *(sph_u32 *)dst = val;
1375#else
1376 if (((SPH_UPTR)dst & 3) == 0) {
1377#if SPH_LITTLE_ENDIAN
1378 val = sph_bswap32(val);
1379#endif
1380 *(sph_u32 *)dst = val;
1381 } else {
1382 ((unsigned char *)dst)[0] = (val >> 24);
1383 ((unsigned char *)dst)[1] = (val >> 16);
1384 ((unsigned char *)dst)[2] = (val >> 8);
1385 ((unsigned char *)dst)[3] = val;
1386 }
1387#endif
1388#else
1389 ((unsigned char *)dst)[0] = (val >> 24);
1390 ((unsigned char *)dst)[1] = (val >> 16);
1391 ((unsigned char *)dst)[2] = (val >> 8);
1392 ((unsigned char *)dst)[3] = val;
1393#endif
1394}
1395
1396/**
1397 * Encode a 32-bit value into the provided buffer (big endian convention).
1398 * The destination buffer must be properly aligned.
1399 *
1400 * @param dst the destination buffer (32-bit aligned)
1401 * @param val the value to encode
1402 */
1403static SPH_INLINE void
1404sph_enc32be_aligned(void *dst, sph_u32 val)
1405{
1406#if SPH_LITTLE_ENDIAN
1407 *(sph_u32 *)dst = sph_bswap32(val);
1408#elif SPH_BIG_ENDIAN
1409 *(sph_u32 *)dst = val;
1410#else
1411 ((unsigned char *)dst)[0] = (val >> 24);
1412 ((unsigned char *)dst)[1] = (val >> 16);
1413 ((unsigned char *)dst)[2] = (val >> 8);
1414 ((unsigned char *)dst)[3] = val;
1415#endif
1416}
1417
1418/**
1419 * Decode a 32-bit value from the provided buffer (big endian convention).
1420 *
1421 * @param src the source buffer
1422 * @return the decoded value
1423 */
1424static SPH_INLINE sph_u32
1425sph_dec32be(const void *src)
1426{
1427#if defined SPH_UPTR
1428#if SPH_UNALIGNED
1429#if SPH_LITTLE_ENDIAN
1430 return sph_bswap32(*(const sph_u32 *)src);
1431#else
1432 return *(const sph_u32 *)src;
1433#endif
1434#else
1435 if (((SPH_UPTR)src & 3) == 0) {
1436#if SPH_LITTLE_ENDIAN
1437 return sph_bswap32(*(const sph_u32 *)src);
1438#else
1439 return *(const sph_u32 *)src;
1440#endif
1441 } else {
1442 return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1443 | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1444 | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1445 | (sph_u32)(((const unsigned char *)src)[3]);
1446 }
1447#endif
1448#else
1449 return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1450 | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1451 | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1452 | (sph_u32)(((const unsigned char *)src)[3]);
1453#endif
1454}
1455
1456/**
1457 * Decode a 32-bit value from the provided buffer (big endian convention).
1458 * The source buffer must be properly aligned.
1459 *
1460 * @param src the source buffer (32-bit aligned)
1461 * @return the decoded value
1462 */
1463static SPH_INLINE sph_u32
1464sph_dec32be_aligned(const void *src)
1465{
1466#if SPH_LITTLE_ENDIAN
1467 return sph_bswap32(*(const sph_u32 *)src);
1468#elif SPH_BIG_ENDIAN
1469 return *(const sph_u32 *)src;
1470#else
1471 return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
1472 | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
1473 | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
1474 | (sph_u32)(((const unsigned char *)src)[3]);
1475#endif
1476}
1477
1478/**
1479 * Encode a 32-bit value into the provided buffer (little endian convention).
1480 *
1481 * @param dst the destination buffer
1482 * @param val the 32-bit value to encode
1483 */
1484static SPH_INLINE void
1485sph_enc32le(void *dst, sph_u32 val)
1486{
1487#if defined SPH_UPTR
1488#if SPH_UNALIGNED
1489#if SPH_BIG_ENDIAN
1490 val = sph_bswap32(val);
1491#endif
1492 *(sph_u32 *)dst = val;
1493#else
1494 if (((SPH_UPTR)dst & 3) == 0) {
1495#if SPH_BIG_ENDIAN
1496 val = sph_bswap32(val);
1497#endif
1498 *(sph_u32 *)dst = val;
1499 } else {
1500 ((unsigned char *)dst)[0] = val;
1501 ((unsigned char *)dst)[1] = (val >> 8);
1502 ((unsigned char *)dst)[2] = (val >> 16);
1503 ((unsigned char *)dst)[3] = (val >> 24);
1504 }
1505#endif
1506#else
1507 ((unsigned char *)dst)[0] = val;
1508 ((unsigned char *)dst)[1] = (val >> 8);
1509 ((unsigned char *)dst)[2] = (val >> 16);
1510 ((unsigned char *)dst)[3] = (val >> 24);
1511#endif
1512}
1513
1514/**
1515 * Encode a 32-bit value into the provided buffer (little endian convention).
1516 * The destination buffer must be properly aligned.
1517 *
1518 * @param dst the destination buffer (32-bit aligned)
1519 * @param val the value to encode
1520 */
1521static SPH_INLINE void
1522sph_enc32le_aligned(void *dst, sph_u32 val)
1523{
1524#if SPH_LITTLE_ENDIAN
1525 *(sph_u32 *)dst = val;
1526#elif SPH_BIG_ENDIAN
1527 *(sph_u32 *)dst = sph_bswap32(val);
1528#else
1529 ((unsigned char *)dst)[0] = val;
1530 ((unsigned char *)dst)[1] = (val >> 8);
1531 ((unsigned char *)dst)[2] = (val >> 16);
1532 ((unsigned char *)dst)[3] = (val >> 24);
1533#endif
1534}
1535
1536/**
1537 * Decode a 32-bit value from the provided buffer (little endian convention).
1538 *
1539 * @param src the source buffer
1540 * @return the decoded value
1541 */
1542static SPH_INLINE sph_u32
1543sph_dec32le(const void *src)
1544{
1545#if defined SPH_UPTR
1546#if SPH_UNALIGNED
1547#if SPH_BIG_ENDIAN
1548 return sph_bswap32(*(const sph_u32 *)src);
1549#else
1550 return *(const sph_u32 *)src;
1551#endif
1552#else
1553 if (((SPH_UPTR)src & 3) == 0) {
1554#if SPH_BIG_ENDIAN
1555#if SPH_SPARCV9_GCC && !SPH_NO_ASM
1556 sph_u32 tmp;
1557
1558 /*
1559 * "__volatile__" is needed here because without it,
1560 * gcc-3.4.3 miscompiles the code and performs the
1561 * access before the test on the address, thus triggering
1562 * a bus error...
1563 */
1564 __asm__ __volatile__ (
1565 "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1566 return tmp;
1567/*
1568 * On PowerPC, this turns out not to be worth the effort: the inline
1569 * assembly makes GCC optimizer uncomfortable, which tends to nullify
1570 * the decoding gains.
1571 *
1572 * For most hash functions, using this inline assembly trick changes
1573 * hashing speed by less than 5% and often _reduces_ it. The biggest
1574 * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is
1575 * less then 10%. The speed gain on CubeHash is probably due to the
1576 * chronic shortage of registers that CubeHash endures; for the other
1577 * functions, the generic code appears to be efficient enough already.
1578 *
1579#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1580 sph_u32 tmp;
1581
1582 __asm__ __volatile__ (
1583 "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1584 return tmp;
1585 */
1586#else
1587 return sph_bswap32(*(const sph_u32 *)src);
1588#endif
1589#else
1590 return *(const sph_u32 *)src;
1591#endif
1592 } else {
1593 return (sph_u32)(((const unsigned char *)src)[0])
1594 | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1595 | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1596 | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1597 }
1598#endif
1599#else
1600 return (sph_u32)(((const unsigned char *)src)[0])
1601 | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1602 | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1603 | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1604#endif
1605}
1606
1607/**
1608 * Decode a 32-bit value from the provided buffer (little endian convention).
1609 * The source buffer must be properly aligned.
1610 *
1611 * @param src the source buffer (32-bit aligned)
1612 * @return the decoded value
1613 */
1614static SPH_INLINE sph_u32
1615sph_dec32le_aligned(const void *src)
1616{
1617#if SPH_LITTLE_ENDIAN
1618 return *(const sph_u32 *)src;
1619#elif SPH_BIG_ENDIAN
1620#if SPH_SPARCV9_GCC && !SPH_NO_ASM
1621 sph_u32 tmp;
1622
1623 __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1624 return tmp;
1625/*
1626 * Not worth it generally.
1627 *
1628#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
1629 sph_u32 tmp;
1630
1631 __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1632 return tmp;
1633 */
1634#else
1635 return sph_bswap32(*(const sph_u32 *)src);
1636#endif
1637#else
1638 return (sph_u32)(((const unsigned char *)src)[0])
1639 | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
1640 | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
1641 | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
1642#endif
1643}
1644
1645#if SPH_64
1646
1647/**
1648 * Encode a 64-bit value into the provided buffer (big endian convention).
1649 *
1650 * @param dst the destination buffer
1651 * @param val the 64-bit value to encode
1652 */
1653static SPH_INLINE void
1654sph_enc64be(void *dst, sph_u64 val)
1655{
1656#if defined SPH_UPTR
1657#if SPH_UNALIGNED
1658#if SPH_LITTLE_ENDIAN
1659 val = sph_bswap64(val);
1660#endif
1661 *(sph_u64 *)dst = val;
1662#else
1663 if (((SPH_UPTR)dst & 7) == 0) {
1664#if SPH_LITTLE_ENDIAN
1665 val = sph_bswap64(val);
1666#endif
1667 *(sph_u64 *)dst = val;
1668 } else {
1669 ((unsigned char *)dst)[0] = (val >> 56);
1670 ((unsigned char *)dst)[1] = (val >> 48);
1671 ((unsigned char *)dst)[2] = (val >> 40);
1672 ((unsigned char *)dst)[3] = (val >> 32);
1673 ((unsigned char *)dst)[4] = (val >> 24);
1674 ((unsigned char *)dst)[5] = (val >> 16);
1675 ((unsigned char *)dst)[6] = (val >> 8);
1676 ((unsigned char *)dst)[7] = val;
1677 }
1678#endif
1679#else
1680 ((unsigned char *)dst)[0] = (val >> 56);
1681 ((unsigned char *)dst)[1] = (val >> 48);
1682 ((unsigned char *)dst)[2] = (val >> 40);
1683 ((unsigned char *)dst)[3] = (val >> 32);
1684 ((unsigned char *)dst)[4] = (val >> 24);
1685 ((unsigned char *)dst)[5] = (val >> 16);
1686 ((unsigned char *)dst)[6] = (val >> 8);
1687 ((unsigned char *)dst)[7] = val;
1688#endif
1689}
1690
1691/**
1692 * Encode a 64-bit value into the provided buffer (big endian convention).
1693 * The destination buffer must be properly aligned.
1694 *
1695 * @param dst the destination buffer (64-bit aligned)
1696 * @param val the value to encode
1697 */
1698static SPH_INLINE void
1699sph_enc64be_aligned(void *dst, sph_u64 val)
1700{
1701#if SPH_LITTLE_ENDIAN
1702 *(sph_u64 *)dst = sph_bswap64(val);
1703#elif SPH_BIG_ENDIAN
1704 *(sph_u64 *)dst = val;
1705#else
1706 ((unsigned char *)dst)[0] = (val >> 56);
1707 ((unsigned char *)dst)[1] = (val >> 48);
1708 ((unsigned char *)dst)[2] = (val >> 40);
1709 ((unsigned char *)dst)[3] = (val >> 32);
1710 ((unsigned char *)dst)[4] = (val >> 24);
1711 ((unsigned char *)dst)[5] = (val >> 16);
1712 ((unsigned char *)dst)[6] = (val >> 8);
1713 ((unsigned char *)dst)[7] = val;
1714#endif
1715}
1716
1717/**
1718 * Decode a 64-bit value from the provided buffer (big endian convention).
1719 *
1720 * @param src the source buffer
1721 * @return the decoded value
1722 */
1723static SPH_INLINE sph_u64
1724sph_dec64be(const void *src)
1725{
1726#if defined SPH_UPTR
1727#if SPH_UNALIGNED
1728#if SPH_LITTLE_ENDIAN
1729 return sph_bswap64(*(const sph_u64 *)src);
1730#else
1731 return *(const sph_u64 *)src;
1732#endif
1733#else
1734 if (((SPH_UPTR)src & 7) == 0) {
1735#if SPH_LITTLE_ENDIAN
1736 return sph_bswap64(*(const sph_u64 *)src);
1737#else
1738 return *(const sph_u64 *)src;
1739#endif
1740 } else {
1741 return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1742 | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1743 | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1744 | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1745 | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1746 | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1747 | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1748 | (sph_u64)(((const unsigned char *)src)[7]);
1749 }
1750#endif
1751#else
1752 return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1753 | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1754 | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1755 | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1756 | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1757 | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1758 | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1759 | (sph_u64)(((const unsigned char *)src)[7]);
1760#endif
1761}
1762
1763/**
1764 * Decode a 64-bit value from the provided buffer (big endian convention).
1765 * The source buffer must be properly aligned.
1766 *
1767 * @param src the source buffer (64-bit aligned)
1768 * @return the decoded value
1769 */
1770static SPH_INLINE sph_u64
1771sph_dec64be_aligned(const void *src)
1772{
1773#if SPH_LITTLE_ENDIAN
1774 return sph_bswap64(*(const sph_u64 *)src);
1775#elif SPH_BIG_ENDIAN
1776 return *(const sph_u64 *)src;
1777#else
1778 return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
1779 | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
1780 | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
1781 | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
1782 | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
1783 | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
1784 | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
1785 | (sph_u64)(((const unsigned char *)src)[7]);
1786#endif
1787}
1788
1789/**
1790 * Encode a 64-bit value into the provided buffer (little endian convention).
1791 *
1792 * @param dst the destination buffer
1793 * @param val the 64-bit value to encode
1794 */
1795static SPH_INLINE void
1796sph_enc64le(void *dst, sph_u64 val)
1797{
1798#if defined SPH_UPTR
1799#if SPH_UNALIGNED
1800#if SPH_BIG_ENDIAN
1801 val = sph_bswap64(val);
1802#endif
1803 *(sph_u64 *)dst = val;
1804#else
1805 if (((SPH_UPTR)dst & 7) == 0) {
1806#if SPH_BIG_ENDIAN
1807 val = sph_bswap64(val);
1808#endif
1809 *(sph_u64 *)dst = val;
1810 } else {
1811 ((unsigned char *)dst)[0] = val;
1812 ((unsigned char *)dst)[1] = (val >> 8);
1813 ((unsigned char *)dst)[2] = (val >> 16);
1814 ((unsigned char *)dst)[3] = (val >> 24);
1815 ((unsigned char *)dst)[4] = (val >> 32);
1816 ((unsigned char *)dst)[5] = (val >> 40);
1817 ((unsigned char *)dst)[6] = (val >> 48);
1818 ((unsigned char *)dst)[7] = (val >> 56);
1819 }
1820#endif
1821#else
1822 ((unsigned char *)dst)[0] = val;
1823 ((unsigned char *)dst)[1] = (val >> 8);
1824 ((unsigned char *)dst)[2] = (val >> 16);
1825 ((unsigned char *)dst)[3] = (val >> 24);
1826 ((unsigned char *)dst)[4] = (val >> 32);
1827 ((unsigned char *)dst)[5] = (val >> 40);
1828 ((unsigned char *)dst)[6] = (val >> 48);
1829 ((unsigned char *)dst)[7] = (val >> 56);
1830#endif
1831}
1832
1833/**
1834 * Encode a 64-bit value into the provided buffer (little endian convention).
1835 * The destination buffer must be properly aligned.
1836 *
1837 * @param dst the destination buffer (64-bit aligned)
1838 * @param val the value to encode
1839 */
1840static SPH_INLINE void
1841sph_enc64le_aligned(void *dst, sph_u64 val)
1842{
1843#if SPH_LITTLE_ENDIAN
1844 *(sph_u64 *)dst = val;
1845#elif SPH_BIG_ENDIAN
1846 *(sph_u64 *)dst = sph_bswap64(val);
1847#else
1848 ((unsigned char *)dst)[0] = val;
1849 ((unsigned char *)dst)[1] = (val >> 8);
1850 ((unsigned char *)dst)[2] = (val >> 16);
1851 ((unsigned char *)dst)[3] = (val >> 24);
1852 ((unsigned char *)dst)[4] = (val >> 32);
1853 ((unsigned char *)dst)[5] = (val >> 40);
1854 ((unsigned char *)dst)[6] = (val >> 48);
1855 ((unsigned char *)dst)[7] = (val >> 56);
1856#endif
1857}
1858
1859/**
1860 * Decode a 64-bit value from the provided buffer (little endian convention).
1861 *
1862 * @param src the source buffer
1863 * @return the decoded value
1864 */
1865static SPH_INLINE sph_u64
1866sph_dec64le(const void *src)
1867{
1868#if defined SPH_UPTR
1869#if SPH_UNALIGNED
1870#if SPH_BIG_ENDIAN
1871 return sph_bswap64(*(const sph_u64 *)src);
1872#else
1873 return *(const sph_u64 *)src;
1874#endif
1875#else
1876 if (((SPH_UPTR)src & 7) == 0) {
1877#if SPH_BIG_ENDIAN
1878#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1879 sph_u64 tmp;
1880
1881 __asm__ __volatile__ (
1882 "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1883 return tmp;
1884/*
1885 * Not worth it generally.
1886 *
1887#elif SPH_PPC32_GCC && !SPH_NO_ASM
1888 return (sph_u64)sph_dec32le_aligned(src)
1889 | ((sph_u64)sph_dec32le_aligned(
1890 (const char *)src + 4) << 32);
1891#elif SPH_PPC64_GCC && !SPH_NO_ASM
1892 sph_u64 tmp;
1893
1894 __asm__ __volatile__ (
1895 "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1896 return tmp;
1897 */
1898#else
1899 return sph_bswap64(*(const sph_u64 *)src);
1900#endif
1901#else
1902 return *(const sph_u64 *)src;
1903#endif
1904 } else {
1905 return (sph_u64)(((const unsigned char *)src)[0])
1906 | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1907 | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1908 | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1909 | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1910 | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1911 | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1912 | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1913 }
1914#endif
1915#else
1916 return (sph_u64)(((const unsigned char *)src)[0])
1917 | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1918 | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1919 | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1920 | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1921 | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1922 | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1923 | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1924#endif
1925}
1926
1927/**
1928 * Decode a 64-bit value from the provided buffer (little endian convention).
1929 * The source buffer must be properly aligned.
1930 *
1931 * @param src the source buffer (64-bit aligned)
1932 * @return the decoded value
1933 */
1934static SPH_INLINE sph_u64
1935sph_dec64le_aligned(const void *src)
1936{
1937#if SPH_LITTLE_ENDIAN
1938 return *(const sph_u64 *)src;
1939#elif SPH_BIG_ENDIAN
1940#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
1941 sph_u64 tmp;
1942
1943 __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
1944 return tmp;
1945/*
1946 * Not worth it generally.
1947 *
1948#elif SPH_PPC32_GCC && !SPH_NO_ASM
1949 return (sph_u64)sph_dec32le_aligned(src)
1950 | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32);
1951#elif SPH_PPC64_GCC && !SPH_NO_ASM
1952 sph_u64 tmp;
1953
1954 __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
1955 return tmp;
1956 */
1957#else
1958 return sph_bswap64(*(const sph_u64 *)src);
1959#endif
1960#else
1961 return (sph_u64)(((const unsigned char *)src)[0])
1962 | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
1963 | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
1964 | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
1965 | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
1966 | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
1967 | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
1968 | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
1969#endif
1970}
1971
1972#endif
1973
1974#endif /* Doxygen excluded block */
1975
1976#endif
This page took 0.22024 seconds and 4 git commands to generate.