]>
Commit | Line | Data |
---|---|---|
e7c033c3 PB |
1 | /* |
2 | * Hierarchical Bitmap Data Type | |
3 | * | |
4 | * Copyright Red Hat, Inc., 2012 | |
5 | * | |
6 | * Author: Paolo Bonzini <[email protected]> | |
7 | * | |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
9 | * later. See the COPYING file in the top-level directory. | |
10 | */ | |
11 | ||
12 | #include <string.h> | |
13 | #include <glib.h> | |
14 | #include <assert.h> | |
15 | #include "qemu/osdep.h" | |
16 | #include "qemu/hbitmap.h" | |
17 | #include "qemu/host-utils.h" | |
18 | #include "trace.h" | |
19 | ||
20 | /* HBitmaps provides an array of bits. The bits are stored as usual in an | |
21 | * array of unsigned longs, but HBitmap is also optimized to provide fast | |
22 | * iteration over set bits; going from one bit to the next is O(logB n) | |
23 | * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough | |
24 | * that the number of levels is in fact fixed. | |
25 | * | |
26 | * In order to do this, it stacks multiple bitmaps with progressively coarser | |
27 | * granularity; in all levels except the last, bit N is set iff the N-th | |
28 | * unsigned long is nonzero in the immediately next level. When iteration | |
29 | * completes on the last level it can examine the 2nd-last level to quickly | |
30 | * skip entire words, and even do so recursively to skip blocks of 64 words or | |
31 | * powers thereof (32 on 32-bit machines). | |
32 | * | |
33 | * Given an index in the bitmap, it can be split in group of bits like | |
34 | * this (for the 64-bit case): | |
35 | * | |
36 | * bits 0-57 => word in the last bitmap | bits 58-63 => bit in the word | |
37 | * bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word | |
38 | * bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word | |
39 | * | |
40 | * So it is easy to move up simply by shifting the index right by | |
41 | * log2(BITS_PER_LONG) bits. To move down, you shift the index left | |
42 | * similarly, and add the word index within the group. Iteration uses | |
43 | * ffs (find first set bit) to find the next word to examine; this | |
44 | * operation can be done in constant time in most current architectures. | |
45 | * | |
46 | * Setting or clearing a range of m bits on all levels, the work to perform | |
47 | * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap. | |
48 | * | |
49 | * When iterating on a bitmap, each bit (on any level) is only visited | |
50 | * once. Hence, The total cost of visiting a bitmap with m bits in it is | |
51 | * the number of bits that are set in all bitmaps. Unless the bitmap is | |
52 | * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized | |
53 | * cost of advancing from one bit to the next is usually constant (worst case | |
54 | * O(logB n) as in the non-amortized complexity). | |
55 | */ | |
56 | ||
57 | struct HBitmap { | |
58 | /* Number of total bits in the bottom level. */ | |
59 | uint64_t size; | |
60 | ||
61 | /* Number of set bits in the bottom level. */ | |
62 | uint64_t count; | |
63 | ||
64 | /* A scaling factor. Given a granularity of G, each bit in the bitmap will | |
65 | * will actually represent a group of 2^G elements. Each operation on a | |
66 | * range of bits first rounds the bits to determine which group they land | |
67 | * in, and then affect the entire page; iteration will only visit the first | |
68 | * bit of each group. Here is an example of operations in a size-16, | |
69 | * granularity-1 HBitmap: | |
70 | * | |
71 | * initial state 00000000 | |
72 | * set(start=0, count=9) 11111000 (iter: 0, 2, 4, 6, 8) | |
73 | * reset(start=1, count=3) 00111000 (iter: 4, 6, 8) | |
74 | * set(start=9, count=2) 00111100 (iter: 4, 6, 8, 10) | |
75 | * reset(start=5, count=5) 00000000 | |
76 | * | |
77 | * From an implementation point of view, when setting or resetting bits, | |
78 | * the bitmap will scale bit numbers right by this amount of bits. When | |
79 | * iterating, the bitmap will scale bit numbers left by this amount of | |
80 | * bits. | |
81 | */ | |
82 | int granularity; | |
83 | ||
84 | /* A number of progressively less coarse bitmaps (i.e. level 0 is the | |
85 | * coarsest). Each bit in level N represents a word in level N+1 that | |
86 | * has a set bit, except the last level where each bit represents the | |
87 | * actual bitmap. | |
88 | * | |
89 | * Note that all bitmaps have the same number of levels. Even a 1-bit | |
90 | * bitmap will still allocate HBITMAP_LEVELS arrays. | |
91 | */ | |
92 | unsigned long *levels[HBITMAP_LEVELS]; | |
93 | }; | |
94 | ||
e7c033c3 PB |
95 | /* Advance hbi to the next nonzero word and return it. hbi->pos |
96 | * is updated. Returns zero if we reach the end of the bitmap. | |
97 | */ | |
98 | unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi) | |
99 | { | |
100 | size_t pos = hbi->pos; | |
101 | const HBitmap *hb = hbi->hb; | |
102 | unsigned i = HBITMAP_LEVELS - 1; | |
103 | ||
104 | unsigned long cur; | |
105 | do { | |
106 | cur = hbi->cur[--i]; | |
107 | pos >>= BITS_PER_LEVEL; | |
108 | } while (cur == 0); | |
109 | ||
110 | /* Check for end of iteration. We always use fewer than BITS_PER_LONG | |
111 | * bits in the level 0 bitmap; thus we can repurpose the most significant | |
112 | * bit as a sentinel. The sentinel is set in hbitmap_alloc and ensures | |
113 | * that the above loop ends even without an explicit check on i. | |
114 | */ | |
115 | ||
116 | if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) { | |
117 | return 0; | |
118 | } | |
119 | for (; i < HBITMAP_LEVELS - 1; i++) { | |
120 | /* Shift back pos to the left, matching the right shifts above. | |
121 | * The index of this word's least significant set bit provides | |
122 | * the low-order bits. | |
123 | */ | |
18331e7c RH |
124 | assert(cur); |
125 | pos = (pos << BITS_PER_LEVEL) + ctzl(cur); | |
e7c033c3 PB |
126 | hbi->cur[i] = cur & (cur - 1); |
127 | ||
128 | /* Set up next level for iteration. */ | |
129 | cur = hb->levels[i + 1][pos]; | |
130 | } | |
131 | ||
132 | hbi->pos = pos; | |
133 | trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur); | |
134 | ||
135 | assert(cur); | |
136 | return cur; | |
137 | } | |
138 | ||
139 | void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first) | |
140 | { | |
141 | unsigned i, bit; | |
142 | uint64_t pos; | |
143 | ||
144 | hbi->hb = hb; | |
145 | pos = first >> hb->granularity; | |
1b095244 | 146 | assert(pos < hb->size); |
e7c033c3 PB |
147 | hbi->pos = pos >> BITS_PER_LEVEL; |
148 | hbi->granularity = hb->granularity; | |
149 | ||
150 | for (i = HBITMAP_LEVELS; i-- > 0; ) { | |
151 | bit = pos & (BITS_PER_LONG - 1); | |
152 | pos >>= BITS_PER_LEVEL; | |
153 | ||
154 | /* Drop bits representing items before first. */ | |
155 | hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1); | |
156 | ||
157 | /* We have already added level i+1, so the lowest set bit has | |
158 | * been processed. Clear it. | |
159 | */ | |
160 | if (i != HBITMAP_LEVELS - 1) { | |
161 | hbi->cur[i] &= ~(1UL << bit); | |
162 | } | |
163 | } | |
164 | } | |
165 | ||
166 | bool hbitmap_empty(const HBitmap *hb) | |
167 | { | |
168 | return hb->count == 0; | |
169 | } | |
170 | ||
171 | int hbitmap_granularity(const HBitmap *hb) | |
172 | { | |
173 | return hb->granularity; | |
174 | } | |
175 | ||
176 | uint64_t hbitmap_count(const HBitmap *hb) | |
177 | { | |
178 | return hb->count << hb->granularity; | |
179 | } | |
180 | ||
181 | /* Count the number of set bits between start and end, not accounting for | |
182 | * the granularity. Also an example of how to use hbitmap_iter_next_word. | |
183 | */ | |
184 | static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last) | |
185 | { | |
186 | HBitmapIter hbi; | |
187 | uint64_t count = 0; | |
188 | uint64_t end = last + 1; | |
189 | unsigned long cur; | |
190 | size_t pos; | |
191 | ||
192 | hbitmap_iter_init(&hbi, hb, start << hb->granularity); | |
193 | for (;;) { | |
194 | pos = hbitmap_iter_next_word(&hbi, &cur); | |
195 | if (pos >= (end >> BITS_PER_LEVEL)) { | |
196 | break; | |
197 | } | |
591b320a | 198 | count += ctpopl(cur); |
e7c033c3 PB |
199 | } |
200 | ||
201 | if (pos == (end >> BITS_PER_LEVEL)) { | |
202 | /* Drop bits representing the END-th and subsequent items. */ | |
203 | int bit = end & (BITS_PER_LONG - 1); | |
204 | cur &= (1UL << bit) - 1; | |
591b320a | 205 | count += ctpopl(cur); |
e7c033c3 PB |
206 | } |
207 | ||
208 | return count; | |
209 | } | |
210 | ||
211 | /* Setting starts at the last layer and propagates up if an element | |
212 | * changes from zero to non-zero. | |
213 | */ | |
214 | static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last) | |
215 | { | |
216 | unsigned long mask; | |
217 | bool changed; | |
218 | ||
219 | assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); | |
220 | assert(start <= last); | |
221 | ||
222 | mask = 2UL << (last & (BITS_PER_LONG - 1)); | |
223 | mask -= 1UL << (start & (BITS_PER_LONG - 1)); | |
224 | changed = (*elem == 0); | |
225 | *elem |= mask; | |
226 | return changed; | |
227 | } | |
228 | ||
229 | /* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ | |
230 | static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last) | |
231 | { | |
232 | size_t pos = start >> BITS_PER_LEVEL; | |
233 | size_t lastpos = last >> BITS_PER_LEVEL; | |
234 | bool changed = false; | |
235 | size_t i; | |
236 | ||
237 | i = pos; | |
238 | if (i < lastpos) { | |
239 | uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; | |
240 | changed |= hb_set_elem(&hb->levels[level][i], start, next - 1); | |
241 | for (;;) { | |
242 | start = next; | |
243 | next += BITS_PER_LONG; | |
244 | if (++i == lastpos) { | |
245 | break; | |
246 | } | |
247 | changed |= (hb->levels[level][i] == 0); | |
248 | hb->levels[level][i] = ~0UL; | |
249 | } | |
250 | } | |
251 | changed |= hb_set_elem(&hb->levels[level][i], start, last); | |
252 | ||
253 | /* If there was any change in this layer, we may have to update | |
254 | * the one above. | |
255 | */ | |
256 | if (level > 0 && changed) { | |
257 | hb_set_between(hb, level - 1, pos, lastpos); | |
258 | } | |
259 | } | |
260 | ||
261 | void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) | |
262 | { | |
263 | /* Compute range in the last layer. */ | |
264 | uint64_t last = start + count - 1; | |
265 | ||
266 | trace_hbitmap_set(hb, start, count, | |
267 | start >> hb->granularity, last >> hb->granularity); | |
268 | ||
269 | start >>= hb->granularity; | |
270 | last >>= hb->granularity; | |
271 | count = last - start + 1; | |
272 | ||
273 | hb->count += count - hb_count_between(hb, start, last); | |
274 | hb_set_between(hb, HBITMAP_LEVELS - 1, start, last); | |
275 | } | |
276 | ||
277 | /* Resetting works the other way round: propagate up if the new | |
278 | * value is zero. | |
279 | */ | |
280 | static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last) | |
281 | { | |
282 | unsigned long mask; | |
283 | bool blanked; | |
284 | ||
285 | assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); | |
286 | assert(start <= last); | |
287 | ||
288 | mask = 2UL << (last & (BITS_PER_LONG - 1)); | |
289 | mask -= 1UL << (start & (BITS_PER_LONG - 1)); | |
290 | blanked = *elem != 0 && ((*elem & ~mask) == 0); | |
291 | *elem &= ~mask; | |
292 | return blanked; | |
293 | } | |
294 | ||
295 | /* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ | |
296 | static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last) | |
297 | { | |
298 | size_t pos = start >> BITS_PER_LEVEL; | |
299 | size_t lastpos = last >> BITS_PER_LEVEL; | |
300 | bool changed = false; | |
301 | size_t i; | |
302 | ||
303 | i = pos; | |
304 | if (i < lastpos) { | |
305 | uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; | |
306 | ||
307 | /* Here we need a more complex test than when setting bits. Even if | |
308 | * something was changed, we must not blank bits in the upper level | |
309 | * unless the lower-level word became entirely zero. So, remove pos | |
310 | * from the upper-level range if bits remain set. | |
311 | */ | |
312 | if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) { | |
313 | changed = true; | |
314 | } else { | |
315 | pos++; | |
316 | } | |
317 | ||
318 | for (;;) { | |
319 | start = next; | |
320 | next += BITS_PER_LONG; | |
321 | if (++i == lastpos) { | |
322 | break; | |
323 | } | |
324 | changed |= (hb->levels[level][i] != 0); | |
325 | hb->levels[level][i] = 0UL; | |
326 | } | |
327 | } | |
328 | ||
329 | /* Same as above, this time for lastpos. */ | |
330 | if (hb_reset_elem(&hb->levels[level][i], start, last)) { | |
331 | changed = true; | |
332 | } else { | |
333 | lastpos--; | |
334 | } | |
335 | ||
336 | if (level > 0 && changed) { | |
337 | hb_reset_between(hb, level - 1, pos, lastpos); | |
338 | } | |
339 | } | |
340 | ||
341 | void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) | |
342 | { | |
343 | /* Compute range in the last layer. */ | |
344 | uint64_t last = start + count - 1; | |
345 | ||
346 | trace_hbitmap_reset(hb, start, count, | |
347 | start >> hb->granularity, last >> hb->granularity); | |
348 | ||
349 | start >>= hb->granularity; | |
350 | last >>= hb->granularity; | |
351 | ||
352 | hb->count -= hb_count_between(hb, start, last); | |
353 | hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last); | |
354 | } | |
355 | ||
356 | bool hbitmap_get(const HBitmap *hb, uint64_t item) | |
357 | { | |
358 | /* Compute position and bit in the last layer. */ | |
359 | uint64_t pos = item >> hb->granularity; | |
360 | unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1)); | |
361 | ||
362 | return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0; | |
363 | } | |
364 | ||
365 | void hbitmap_free(HBitmap *hb) | |
366 | { | |
367 | unsigned i; | |
368 | for (i = HBITMAP_LEVELS; i-- > 0; ) { | |
369 | g_free(hb->levels[i]); | |
370 | } | |
371 | g_free(hb); | |
372 | } | |
373 | ||
374 | HBitmap *hbitmap_alloc(uint64_t size, int granularity) | |
375 | { | |
e1cf5582 | 376 | HBitmap *hb = g_new0(struct HBitmap, 1); |
e7c033c3 PB |
377 | unsigned i; |
378 | ||
379 | assert(granularity >= 0 && granularity < 64); | |
380 | size = (size + (1ULL << granularity) - 1) >> granularity; | |
381 | assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE)); | |
382 | ||
383 | hb->size = size; | |
384 | hb->granularity = granularity; | |
385 | for (i = HBITMAP_LEVELS; i-- > 0; ) { | |
386 | size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); | |
e1cf5582 | 387 | hb->levels[i] = g_new0(unsigned long, size); |
e7c033c3 PB |
388 | } |
389 | ||
390 | /* We necessarily have free bits in level 0 due to the definition | |
391 | * of HBITMAP_LEVELS, so use one for a sentinel. This speeds up | |
392 | * hbitmap_iter_skip_words. | |
393 | */ | |
394 | assert(size == 1); | |
395 | hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1); | |
396 | return hb; | |
397 | } |