]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Utility compute operations used by translated code. | |
3 | * | |
4 | * Copyright (c) 2007 Thiemo Seufer | |
5 | * Copyright (c) 2007 Jocelyn Mayer | |
6 | * | |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
8 | * of this software and associated documentation files (the "Software"), to deal | |
9 | * in the Software without restriction, including without limitation the rights | |
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
11 | * copies of the Software, and to permit persons to whom the Software is | |
12 | * furnished to do so, subject to the following conditions: | |
13 | * | |
14 | * The above copyright notice and this permission notice shall be included in | |
15 | * all copies or substantial portions of the Software. | |
16 | * | |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
23 | * THE SOFTWARE. | |
24 | */ | |
25 | #ifndef HOST_UTILS_H | |
26 | #define HOST_UTILS_H 1 | |
27 | ||
28 | #include "qemu/compiler.h" /* QEMU_GNUC_PREREQ */ | |
29 | #include "qemu/bswap.h" | |
30 | #include <limits.h> | |
31 | #include <stdbool.h> | |
32 | ||
33 | #ifdef CONFIG_INT128 | |
34 | static inline void mulu64(uint64_t *plow, uint64_t *phigh, | |
35 | uint64_t a, uint64_t b) | |
36 | { | |
37 | __uint128_t r = (__uint128_t)a * b; | |
38 | *plow = r; | |
39 | *phigh = r >> 64; | |
40 | } | |
41 | ||
42 | static inline void muls64(uint64_t *plow, uint64_t *phigh, | |
43 | int64_t a, int64_t b) | |
44 | { | |
45 | __int128_t r = (__int128_t)a * b; | |
46 | *plow = r; | |
47 | *phigh = r >> 64; | |
48 | } | |
49 | ||
50 | /* compute with 96 bit intermediate result: (a*b)/c */ | |
51 | static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) | |
52 | { | |
53 | return (__int128_t)a * b / c; | |
54 | } | |
55 | ||
56 | static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor) | |
57 | { | |
58 | if (divisor == 0) { | |
59 | return 1; | |
60 | } else { | |
61 | __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow; | |
62 | __uint128_t result = dividend / divisor; | |
63 | *plow = result; | |
64 | *phigh = dividend % divisor; | |
65 | return result > UINT64_MAX; | |
66 | } | |
67 | } | |
68 | ||
69 | static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor) | |
70 | { | |
71 | if (divisor == 0) { | |
72 | return 1; | |
73 | } else { | |
74 | __int128_t dividend = ((__int128_t)*phigh << 64) | *plow; | |
75 | __int128_t result = dividend / divisor; | |
76 | *plow = result; | |
77 | *phigh = dividend % divisor; | |
78 | return result != *plow; | |
79 | } | |
80 | } | |
81 | #else | |
82 | void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b); | |
83 | void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b); | |
84 | int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); | |
85 | int divs128(int64_t *plow, int64_t *phigh, int64_t divisor); | |
86 | ||
87 | static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) | |
88 | { | |
89 | union { | |
90 | uint64_t ll; | |
91 | struct { | |
92 | #ifdef HOST_WORDS_BIGENDIAN | |
93 | uint32_t high, low; | |
94 | #else | |
95 | uint32_t low, high; | |
96 | #endif | |
97 | } l; | |
98 | } u, res; | |
99 | uint64_t rl, rh; | |
100 | ||
101 | u.ll = a; | |
102 | rl = (uint64_t)u.l.low * (uint64_t)b; | |
103 | rh = (uint64_t)u.l.high * (uint64_t)b; | |
104 | rh += (rl >> 32); | |
105 | res.l.high = rh / c; | |
106 | res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; | |
107 | return res.ll; | |
108 | } | |
109 | #endif | |
110 | ||
111 | /** | |
112 | * clz32 - count leading zeros in a 32-bit value. | |
113 | * @val: The value to search | |
114 | * | |
115 | * Returns 32 if the value is zero. Note that the GCC builtin is | |
116 | * undefined if the value is zero. | |
117 | */ | |
118 | static inline int clz32(uint32_t val) | |
119 | { | |
120 | #if QEMU_GNUC_PREREQ(3, 4) | |
121 | return val ? __builtin_clz(val) : 32; | |
122 | #else | |
123 | /* Binary search for the leading one bit. */ | |
124 | int cnt = 0; | |
125 | ||
126 | if (!(val & 0xFFFF0000U)) { | |
127 | cnt += 16; | |
128 | val <<= 16; | |
129 | } | |
130 | if (!(val & 0xFF000000U)) { | |
131 | cnt += 8; | |
132 | val <<= 8; | |
133 | } | |
134 | if (!(val & 0xF0000000U)) { | |
135 | cnt += 4; | |
136 | val <<= 4; | |
137 | } | |
138 | if (!(val & 0xC0000000U)) { | |
139 | cnt += 2; | |
140 | val <<= 2; | |
141 | } | |
142 | if (!(val & 0x80000000U)) { | |
143 | cnt++; | |
144 | val <<= 1; | |
145 | } | |
146 | if (!(val & 0x80000000U)) { | |
147 | cnt++; | |
148 | } | |
149 | return cnt; | |
150 | #endif | |
151 | } | |
152 | ||
153 | /** | |
154 | * clo32 - count leading ones in a 32-bit value. | |
155 | * @val: The value to search | |
156 | * | |
157 | * Returns 32 if the value is -1. | |
158 | */ | |
159 | static inline int clo32(uint32_t val) | |
160 | { | |
161 | return clz32(~val); | |
162 | } | |
163 | ||
164 | /** | |
165 | * clz64 - count leading zeros in a 64-bit value. | |
166 | * @val: The value to search | |
167 | * | |
168 | * Returns 64 if the value is zero. Note that the GCC builtin is | |
169 | * undefined if the value is zero. | |
170 | */ | |
171 | static inline int clz64(uint64_t val) | |
172 | { | |
173 | #if QEMU_GNUC_PREREQ(3, 4) | |
174 | return val ? __builtin_clzll(val) : 64; | |
175 | #else | |
176 | int cnt = 0; | |
177 | ||
178 | if (!(val >> 32)) { | |
179 | cnt += 32; | |
180 | } else { | |
181 | val >>= 32; | |
182 | } | |
183 | ||
184 | return cnt + clz32(val); | |
185 | #endif | |
186 | } | |
187 | ||
188 | /** | |
189 | * clo64 - count leading ones in a 64-bit value. | |
190 | * @val: The value to search | |
191 | * | |
192 | * Returns 64 if the value is -1. | |
193 | */ | |
194 | static inline int clo64(uint64_t val) | |
195 | { | |
196 | return clz64(~val); | |
197 | } | |
198 | ||
199 | /** | |
200 | * ctz32 - count trailing zeros in a 32-bit value. | |
201 | * @val: The value to search | |
202 | * | |
203 | * Returns 32 if the value is zero. Note that the GCC builtin is | |
204 | * undefined if the value is zero. | |
205 | */ | |
206 | static inline int ctz32(uint32_t val) | |
207 | { | |
208 | #if QEMU_GNUC_PREREQ(3, 4) | |
209 | return val ? __builtin_ctz(val) : 32; | |
210 | #else | |
211 | /* Binary search for the trailing one bit. */ | |
212 | int cnt; | |
213 | ||
214 | cnt = 0; | |
215 | if (!(val & 0x0000FFFFUL)) { | |
216 | cnt += 16; | |
217 | val >>= 16; | |
218 | } | |
219 | if (!(val & 0x000000FFUL)) { | |
220 | cnt += 8; | |
221 | val >>= 8; | |
222 | } | |
223 | if (!(val & 0x0000000FUL)) { | |
224 | cnt += 4; | |
225 | val >>= 4; | |
226 | } | |
227 | if (!(val & 0x00000003UL)) { | |
228 | cnt += 2; | |
229 | val >>= 2; | |
230 | } | |
231 | if (!(val & 0x00000001UL)) { | |
232 | cnt++; | |
233 | val >>= 1; | |
234 | } | |
235 | if (!(val & 0x00000001UL)) { | |
236 | cnt++; | |
237 | } | |
238 | ||
239 | return cnt; | |
240 | #endif | |
241 | } | |
242 | ||
243 | /** | |
244 | * cto32 - count trailing ones in a 32-bit value. | |
245 | * @val: The value to search | |
246 | * | |
247 | * Returns 32 if the value is -1. | |
248 | */ | |
249 | static inline int cto32(uint32_t val) | |
250 | { | |
251 | return ctz32(~val); | |
252 | } | |
253 | ||
254 | /** | |
255 | * ctz64 - count trailing zeros in a 64-bit value. | |
256 | * @val: The value to search | |
257 | * | |
258 | * Returns 64 if the value is zero. Note that the GCC builtin is | |
259 | * undefined if the value is zero. | |
260 | */ | |
261 | static inline int ctz64(uint64_t val) | |
262 | { | |
263 | #if QEMU_GNUC_PREREQ(3, 4) | |
264 | return val ? __builtin_ctzll(val) : 64; | |
265 | #else | |
266 | int cnt; | |
267 | ||
268 | cnt = 0; | |
269 | if (!((uint32_t)val)) { | |
270 | cnt += 32; | |
271 | val >>= 32; | |
272 | } | |
273 | ||
274 | return cnt + ctz32(val); | |
275 | #endif | |
276 | } | |
277 | ||
278 | /** | |
279 | * cto64 - count trailing ones in a 64-bit value. | |
280 | * @val: The value to search | |
281 | * | |
282 | * Returns 64 if the value is -1. | |
283 | */ | |
284 | static inline int cto64(uint64_t val) | |
285 | { | |
286 | return ctz64(~val); | |
287 | } | |
288 | ||
289 | /** | |
290 | * clrsb32 - count leading redundant sign bits in a 32-bit value. | |
291 | * @val: The value to search | |
292 | * | |
293 | * Returns the number of bits following the sign bit that are equal to it. | |
294 | * No special cases; output range is [0-31]. | |
295 | */ | |
296 | static inline int clrsb32(uint32_t val) | |
297 | { | |
298 | #if QEMU_GNUC_PREREQ(4, 7) | |
299 | return __builtin_clrsb(val); | |
300 | #else | |
301 | return clz32(val ^ ((int32_t)val >> 1)) - 1; | |
302 | #endif | |
303 | } | |
304 | ||
305 | /** | |
306 | * clrsb64 - count leading redundant sign bits in a 64-bit value. | |
307 | * @val: The value to search | |
308 | * | |
309 | * Returns the number of bits following the sign bit that are equal to it. | |
310 | * No special cases; output range is [0-63]. | |
311 | */ | |
312 | static inline int clrsb64(uint64_t val) | |
313 | { | |
314 | #if QEMU_GNUC_PREREQ(4, 7) | |
315 | return __builtin_clrsbll(val); | |
316 | #else | |
317 | return clz64(val ^ ((int64_t)val >> 1)) - 1; | |
318 | #endif | |
319 | } | |
320 | ||
321 | /** | |
322 | * ctpop8 - count the population of one bits in an 8-bit value. | |
323 | * @val: The value to search | |
324 | */ | |
325 | static inline int ctpop8(uint8_t val) | |
326 | { | |
327 | #if QEMU_GNUC_PREREQ(3, 4) | |
328 | return __builtin_popcount(val); | |
329 | #else | |
330 | val = (val & 0x55) + ((val >> 1) & 0x55); | |
331 | val = (val & 0x33) + ((val >> 2) & 0x33); | |
332 | val = (val & 0x0f) + ((val >> 4) & 0x0f); | |
333 | ||
334 | return val; | |
335 | #endif | |
336 | } | |
337 | ||
338 | /** | |
339 | * ctpop16 - count the population of one bits in a 16-bit value. | |
340 | * @val: The value to search | |
341 | */ | |
342 | static inline int ctpop16(uint16_t val) | |
343 | { | |
344 | #if QEMU_GNUC_PREREQ(3, 4) | |
345 | return __builtin_popcount(val); | |
346 | #else | |
347 | val = (val & 0x5555) + ((val >> 1) & 0x5555); | |
348 | val = (val & 0x3333) + ((val >> 2) & 0x3333); | |
349 | val = (val & 0x0f0f) + ((val >> 4) & 0x0f0f); | |
350 | val = (val & 0x00ff) + ((val >> 8) & 0x00ff); | |
351 | ||
352 | return val; | |
353 | #endif | |
354 | } | |
355 | ||
356 | /** | |
357 | * ctpop32 - count the population of one bits in a 32-bit value. | |
358 | * @val: The value to search | |
359 | */ | |
360 | static inline int ctpop32(uint32_t val) | |
361 | { | |
362 | #if QEMU_GNUC_PREREQ(3, 4) | |
363 | return __builtin_popcount(val); | |
364 | #else | |
365 | val = (val & 0x55555555) + ((val >> 1) & 0x55555555); | |
366 | val = (val & 0x33333333) + ((val >> 2) & 0x33333333); | |
367 | val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); | |
368 | val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff); | |
369 | val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff); | |
370 | ||
371 | return val; | |
372 | #endif | |
373 | } | |
374 | ||
375 | /** | |
376 | * ctpop64 - count the population of one bits in a 64-bit value. | |
377 | * @val: The value to search | |
378 | */ | |
379 | static inline int ctpop64(uint64_t val) | |
380 | { | |
381 | #if QEMU_GNUC_PREREQ(3, 4) | |
382 | return __builtin_popcountll(val); | |
383 | #else | |
384 | val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL); | |
385 | val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); | |
386 | val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 0x0f0f0f0f0f0f0f0fULL); | |
387 | val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 0x00ff00ff00ff00ffULL); | |
388 | val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 0x0000ffff0000ffffULL); | |
389 | val = (val & 0x00000000ffffffffULL) + ((val >> 32) & 0x00000000ffffffffULL); | |
390 | ||
391 | return val; | |
392 | #endif | |
393 | } | |
394 | ||
395 | /** | |
396 | * revbit8 - reverse the bits in an 8-bit value. | |
397 | * @x: The value to modify. | |
398 | */ | |
399 | static inline uint8_t revbit8(uint8_t x) | |
400 | { | |
401 | /* Assign the correct nibble position. */ | |
402 | x = ((x & 0xf0) >> 4) | |
403 | | ((x & 0x0f) << 4); | |
404 | /* Assign the correct bit position. */ | |
405 | x = ((x & 0x88) >> 3) | |
406 | | ((x & 0x44) >> 1) | |
407 | | ((x & 0x22) << 1) | |
408 | | ((x & 0x11) << 3); | |
409 | return x; | |
410 | } | |
411 | ||
412 | /** | |
413 | * revbit16 - reverse the bits in a 16-bit value. | |
414 | * @x: The value to modify. | |
415 | */ | |
416 | static inline uint16_t revbit16(uint16_t x) | |
417 | { | |
418 | /* Assign the correct byte position. */ | |
419 | x = bswap16(x); | |
420 | /* Assign the correct nibble position. */ | |
421 | x = ((x & 0xf0f0) >> 4) | |
422 | | ((x & 0x0f0f) << 4); | |
423 | /* Assign the correct bit position. */ | |
424 | x = ((x & 0x8888) >> 3) | |
425 | | ((x & 0x4444) >> 1) | |
426 | | ((x & 0x2222) << 1) | |
427 | | ((x & 0x1111) << 3); | |
428 | return x; | |
429 | } | |
430 | ||
431 | /** | |
432 | * revbit32 - reverse the bits in a 32-bit value. | |
433 | * @x: The value to modify. | |
434 | */ | |
435 | static inline uint32_t revbit32(uint32_t x) | |
436 | { | |
437 | /* Assign the correct byte position. */ | |
438 | x = bswap32(x); | |
439 | /* Assign the correct nibble position. */ | |
440 | x = ((x & 0xf0f0f0f0u) >> 4) | |
441 | | ((x & 0x0f0f0f0fu) << 4); | |
442 | /* Assign the correct bit position. */ | |
443 | x = ((x & 0x88888888u) >> 3) | |
444 | | ((x & 0x44444444u) >> 1) | |
445 | | ((x & 0x22222222u) << 1) | |
446 | | ((x & 0x11111111u) << 3); | |
447 | return x; | |
448 | } | |
449 | ||
450 | /** | |
451 | * revbit64 - reverse the bits in a 64-bit value. | |
452 | * @x: The value to modify. | |
453 | */ | |
454 | static inline uint64_t revbit64(uint64_t x) | |
455 | { | |
456 | /* Assign the correct byte position. */ | |
457 | x = bswap64(x); | |
458 | /* Assign the correct nibble position. */ | |
459 | x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4) | |
460 | | ((x & 0x0f0f0f0f0f0f0f0full) << 4); | |
461 | /* Assign the correct bit position. */ | |
462 | x = ((x & 0x8888888888888888ull) >> 3) | |
463 | | ((x & 0x4444444444444444ull) >> 1) | |
464 | | ((x & 0x2222222222222222ull) << 1) | |
465 | | ((x & 0x1111111111111111ull) << 3); | |
466 | return x; | |
467 | } | |
468 | ||
469 | /* Host type specific sizes of these routines. */ | |
470 | ||
471 | #if ULONG_MAX == UINT32_MAX | |
472 | # define clzl clz32 | |
473 | # define ctzl ctz32 | |
474 | # define clol clo32 | |
475 | # define ctol cto32 | |
476 | # define ctpopl ctpop32 | |
477 | # define revbitl revbit32 | |
478 | #elif ULONG_MAX == UINT64_MAX | |
479 | # define clzl clz64 | |
480 | # define ctzl ctz64 | |
481 | # define clol clo64 | |
482 | # define ctol cto64 | |
483 | # define ctpopl ctpop64 | |
484 | # define revbitl revbit64 | |
485 | #else | |
486 | # error Unknown sizeof long | |
487 | #endif | |
488 | ||
489 | static inline bool is_power_of_2(uint64_t value) | |
490 | { | |
491 | if (!value) { | |
492 | return 0; | |
493 | } | |
494 | ||
495 | return !(value & (value - 1)); | |
496 | } | |
497 | ||
498 | /* round down to the nearest power of 2*/ | |
499 | static inline int64_t pow2floor(int64_t value) | |
500 | { | |
501 | if (!is_power_of_2(value)) { | |
502 | value = 0x8000000000000000ULL >> clz64(value); | |
503 | } | |
504 | return value; | |
505 | } | |
506 | ||
507 | /* round up to the nearest power of 2 (0 if overflow) */ | |
508 | static inline uint64_t pow2ceil(uint64_t value) | |
509 | { | |
510 | uint8_t nlz = clz64(value); | |
511 | ||
512 | if (is_power_of_2(value)) { | |
513 | return value; | |
514 | } | |
515 | if (!nlz) { | |
516 | return 0; | |
517 | } | |
518 | return 1ULL << (64 - nlz); | |
519 | } | |
520 | ||
521 | #endif |