]>
Commit | Line | Data |
---|---|---|
e7c033c3 PB |
1 | /* |
2 | * Hierarchical Bitmap Data Type | |
3 | * | |
4 | * Copyright Red Hat, Inc., 2012 | |
5 | * | |
6 | * Author: Paolo Bonzini <[email protected]> | |
7 | * | |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
9 | * later. See the COPYING file in the top-level directory. | |
10 | */ | |
11 | ||
e7c033c3 PB |
12 | #include "qemu/osdep.h" |
13 | #include "qemu/hbitmap.h" | |
14 | #include "qemu/host-utils.h" | |
15 | #include "trace.h" | |
a3b52535 | 16 | #include "crypto/hash.h" |
e7c033c3 PB |
17 | |
18 | /* HBitmaps provides an array of bits. The bits are stored as usual in an | |
19 | * array of unsigned longs, but HBitmap is also optimized to provide fast | |
20 | * iteration over set bits; going from one bit to the next is O(logB n) | |
21 | * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough | |
22 | * that the number of levels is in fact fixed. | |
23 | * | |
24 | * In order to do this, it stacks multiple bitmaps with progressively coarser | |
25 | * granularity; in all levels except the last, bit N is set iff the N-th | |
26 | * unsigned long is nonzero in the immediately next level. When iteration | |
27 | * completes on the last level it can examine the 2nd-last level to quickly | |
28 | * skip entire words, and even do so recursively to skip blocks of 64 words or | |
29 | * powers thereof (32 on 32-bit machines). | |
30 | * | |
31 | * Given an index in the bitmap, it can be split in group of bits like | |
32 | * this (for the 64-bit case): | |
33 | * | |
34 | * bits 0-57 => word in the last bitmap | bits 58-63 => bit in the word | |
35 | * bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word | |
36 | * bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word | |
37 | * | |
38 | * So it is easy to move up simply by shifting the index right by | |
39 | * log2(BITS_PER_LONG) bits. To move down, you shift the index left | |
40 | * similarly, and add the word index within the group. Iteration uses | |
41 | * ffs (find first set bit) to find the next word to examine; this | |
42 | * operation can be done in constant time in most current architectures. | |
43 | * | |
44 | * Setting or clearing a range of m bits on all levels, the work to perform | |
45 | * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap. | |
46 | * | |
47 | * When iterating on a bitmap, each bit (on any level) is only visited | |
48 | * once. Hence, The total cost of visiting a bitmap with m bits in it is | |
49 | * the number of bits that are set in all bitmaps. Unless the bitmap is | |
50 | * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized | |
51 | * cost of advancing from one bit to the next is usually constant (worst case | |
52 | * O(logB n) as in the non-amortized complexity). | |
53 | */ | |
54 | ||
55 | struct HBitmap { | |
4e4de222 VSO |
56 | /* |
57 | * Size of the bitmap, as requested in hbitmap_alloc or in hbitmap_truncate. | |
58 | */ | |
76d570dc VSO |
59 | uint64_t orig_size; |
60 | ||
e7c033c3 PB |
61 | /* Number of total bits in the bottom level. */ |
62 | uint64_t size; | |
63 | ||
64 | /* Number of set bits in the bottom level. */ | |
65 | uint64_t count; | |
66 | ||
67 | /* A scaling factor. Given a granularity of G, each bit in the bitmap will | |
68 | * will actually represent a group of 2^G elements. Each operation on a | |
69 | * range of bits first rounds the bits to determine which group they land | |
70 | * in, and then affect the entire page; iteration will only visit the first | |
71 | * bit of each group. Here is an example of operations in a size-16, | |
72 | * granularity-1 HBitmap: | |
73 | * | |
74 | * initial state 00000000 | |
75 | * set(start=0, count=9) 11111000 (iter: 0, 2, 4, 6, 8) | |
76 | * reset(start=1, count=3) 00111000 (iter: 4, 6, 8) | |
77 | * set(start=9, count=2) 00111100 (iter: 4, 6, 8, 10) | |
78 | * reset(start=5, count=5) 00000000 | |
79 | * | |
80 | * From an implementation point of view, when setting or resetting bits, | |
81 | * the bitmap will scale bit numbers right by this amount of bits. When | |
82 | * iterating, the bitmap will scale bit numbers left by this amount of | |
83 | * bits. | |
84 | */ | |
85 | int granularity; | |
86 | ||
07ac4cdb FZ |
87 | /* A meta dirty bitmap to track the dirtiness of bits in this HBitmap. */ |
88 | HBitmap *meta; | |
89 | ||
e7c033c3 PB |
90 | /* A number of progressively less coarse bitmaps (i.e. level 0 is the |
91 | * coarsest). Each bit in level N represents a word in level N+1 that | |
92 | * has a set bit, except the last level where each bit represents the | |
93 | * actual bitmap. | |
94 | * | |
95 | * Note that all bitmaps have the same number of levels. Even a 1-bit | |
96 | * bitmap will still allocate HBITMAP_LEVELS arrays. | |
97 | */ | |
98 | unsigned long *levels[HBITMAP_LEVELS]; | |
8515efbe JS |
99 | |
100 | /* The length of each levels[] array. */ | |
101 | uint64_t sizes[HBITMAP_LEVELS]; | |
e7c033c3 PB |
102 | }; |
103 | ||
e7c033c3 PB |
104 | /* Advance hbi to the next nonzero word and return it. hbi->pos |
105 | * is updated. Returns zero if we reach the end of the bitmap. | |
106 | */ | |
30b8346c | 107 | static unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi) |
e7c033c3 PB |
108 | { |
109 | size_t pos = hbi->pos; | |
110 | const HBitmap *hb = hbi->hb; | |
111 | unsigned i = HBITMAP_LEVELS - 1; | |
112 | ||
113 | unsigned long cur; | |
114 | do { | |
f63ea4e9 | 115 | i--; |
e7c033c3 | 116 | pos >>= BITS_PER_LEVEL; |
f63ea4e9 | 117 | cur = hbi->cur[i] & hb->levels[i][pos]; |
e7c033c3 PB |
118 | } while (cur == 0); |
119 | ||
120 | /* Check for end of iteration. We always use fewer than BITS_PER_LONG | |
121 | * bits in the level 0 bitmap; thus we can repurpose the most significant | |
122 | * bit as a sentinel. The sentinel is set in hbitmap_alloc and ensures | |
123 | * that the above loop ends even without an explicit check on i. | |
124 | */ | |
125 | ||
126 | if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) { | |
127 | return 0; | |
128 | } | |
129 | for (; i < HBITMAP_LEVELS - 1; i++) { | |
130 | /* Shift back pos to the left, matching the right shifts above. | |
131 | * The index of this word's least significant set bit provides | |
132 | * the low-order bits. | |
133 | */ | |
18331e7c RH |
134 | assert(cur); |
135 | pos = (pos << BITS_PER_LEVEL) + ctzl(cur); | |
e7c033c3 PB |
136 | hbi->cur[i] = cur & (cur - 1); |
137 | ||
138 | /* Set up next level for iteration. */ | |
139 | cur = hb->levels[i + 1][pos]; | |
140 | } | |
141 | ||
142 | hbi->pos = pos; | |
143 | trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur); | |
144 | ||
145 | assert(cur); | |
146 | return cur; | |
147 | } | |
148 | ||
19c021e1 | 149 | int64_t hbitmap_iter_next(HBitmapIter *hbi) |
f63ea4e9 VSO |
150 | { |
151 | unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] & | |
152 | hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos]; | |
153 | int64_t item; | |
154 | ||
155 | if (cur == 0) { | |
156 | cur = hbitmap_iter_skip_words(hbi); | |
157 | if (cur == 0) { | |
158 | return -1; | |
159 | } | |
160 | } | |
161 | ||
19c021e1 VSO |
162 | /* The next call will resume work from the next bit. */ |
163 | hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1); | |
f63ea4e9 VSO |
164 | item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur); |
165 | ||
166 | return item << hbi->granularity; | |
167 | } | |
168 | ||
e7c033c3 PB |
169 | void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first) |
170 | { | |
171 | unsigned i, bit; | |
172 | uint64_t pos; | |
173 | ||
174 | hbi->hb = hb; | |
175 | pos = first >> hb->granularity; | |
1b095244 | 176 | assert(pos < hb->size); |
e7c033c3 PB |
177 | hbi->pos = pos >> BITS_PER_LEVEL; |
178 | hbi->granularity = hb->granularity; | |
179 | ||
180 | for (i = HBITMAP_LEVELS; i-- > 0; ) { | |
181 | bit = pos & (BITS_PER_LONG - 1); | |
182 | pos >>= BITS_PER_LEVEL; | |
183 | ||
184 | /* Drop bits representing items before first. */ | |
185 | hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1); | |
186 | ||
187 | /* We have already added level i+1, so the lowest set bit has | |
188 | * been processed. Clear it. | |
189 | */ | |
190 | if (i != HBITMAP_LEVELS - 1) { | |
191 | hbi->cur[i] &= ~(1UL << bit); | |
192 | } | |
193 | } | |
194 | } | |
195 | ||
9399c54b VSO |
196 | int64_t hbitmap_next_dirty(const HBitmap *hb, int64_t start, int64_t count) |
197 | { | |
198 | HBitmapIter hbi; | |
199 | int64_t first_dirty_off; | |
200 | uint64_t end; | |
201 | ||
202 | assert(start >= 0 && count >= 0); | |
203 | ||
204 | if (start >= hb->orig_size || count == 0) { | |
205 | return -1; | |
206 | } | |
207 | ||
208 | end = count > hb->orig_size - start ? hb->orig_size : start + count; | |
209 | ||
210 | hbitmap_iter_init(&hbi, hb, start); | |
211 | first_dirty_off = hbitmap_iter_next(&hbi); | |
212 | ||
213 | if (first_dirty_off < 0 || first_dirty_off >= end) { | |
214 | return -1; | |
215 | } | |
216 | ||
217 | return MAX(start, first_dirty_off); | |
218 | } | |
219 | ||
642700fd | 220 | int64_t hbitmap_next_zero(const HBitmap *hb, int64_t start, int64_t count) |
56207df5 VSO |
221 | { |
222 | size_t pos = (start >> hb->granularity) >> BITS_PER_LEVEL; | |
223 | unsigned long *last_lev = hb->levels[HBITMAP_LEVELS - 1]; | |
56207df5 | 224 | unsigned long cur = last_lev[pos]; |
76d570dc VSO |
225 | unsigned start_bit_offset; |
226 | uint64_t end_bit, sz; | |
56207df5 VSO |
227 | int64_t res; |
228 | ||
642700fd VSO |
229 | assert(start >= 0 && count >= 0); |
230 | ||
76d570dc VSO |
231 | if (start >= hb->orig_size || count == 0) { |
232 | return -1; | |
233 | } | |
234 | ||
235 | end_bit = count > hb->orig_size - start ? | |
236 | hb->size : | |
237 | ((start + count - 1) >> hb->granularity) + 1; | |
238 | sz = (end_bit + BITS_PER_LONG - 1) >> BITS_PER_LEVEL; | |
239 | ||
240 | /* There may be some zero bits in @cur before @start. We are not interested | |
241 | * in them, let's set them. | |
242 | */ | |
243 | start_bit_offset = (start >> hb->granularity) & (BITS_PER_LONG - 1); | |
56207df5 VSO |
244 | cur |= (1UL << start_bit_offset) - 1; |
245 | assert((start >> hb->granularity) < hb->size); | |
246 | ||
247 | if (cur == (unsigned long)-1) { | |
248 | do { | |
249 | pos++; | |
250 | } while (pos < sz && last_lev[pos] == (unsigned long)-1); | |
251 | ||
252 | if (pos >= sz) { | |
253 | return -1; | |
254 | } | |
255 | ||
256 | cur = last_lev[pos]; | |
257 | } | |
258 | ||
259 | res = (pos << BITS_PER_LEVEL) + ctol(cur); | |
76d570dc | 260 | if (res >= end_bit) { |
56207df5 VSO |
261 | return -1; |
262 | } | |
263 | ||
264 | res = res << hb->granularity; | |
265 | if (res < start) { | |
266 | assert(((start - res) >> hb->granularity) == 0); | |
267 | return start; | |
268 | } | |
269 | ||
270 | return res; | |
271 | } | |
272 | ||
299ea9ff VSO |
273 | bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end, |
274 | int64_t max_dirty_count, | |
275 | int64_t *dirty_start, int64_t *dirty_count) | |
a78a1a48 | 276 | { |
299ea9ff | 277 | int64_t next_zero; |
642700fd | 278 | |
299ea9ff VSO |
279 | assert(start >= 0 && end >= 0 && max_dirty_count > 0); |
280 | ||
281 | end = MIN(end, hb->orig_size); | |
282 | if (start >= end) { | |
a78a1a48 VSO |
283 | return false; |
284 | } | |
285 | ||
299ea9ff VSO |
286 | start = hbitmap_next_dirty(hb, start, end - start); |
287 | if (start < 0) { | |
288 | return false; | |
a78a1a48 VSO |
289 | } |
290 | ||
299ea9ff VSO |
291 | end = start + MIN(end - start, max_dirty_count); |
292 | ||
293 | next_zero = hbitmap_next_zero(hb, start, end - start); | |
294 | if (next_zero >= 0) { | |
295 | end = next_zero; | |
296 | } | |
297 | ||
298 | *dirty_start = start; | |
299 | *dirty_count = end - start; | |
a78a1a48 VSO |
300 | |
301 | return true; | |
302 | } | |
303 | ||
e7c033c3 PB |
304 | bool hbitmap_empty(const HBitmap *hb) |
305 | { | |
306 | return hb->count == 0; | |
307 | } | |
308 | ||
309 | int hbitmap_granularity(const HBitmap *hb) | |
310 | { | |
311 | return hb->granularity; | |
312 | } | |
313 | ||
314 | uint64_t hbitmap_count(const HBitmap *hb) | |
315 | { | |
316 | return hb->count << hb->granularity; | |
317 | } | |
318 | ||
be24c714 VSO |
319 | /** |
320 | * hbitmap_iter_next_word: | |
321 | * @hbi: HBitmapIter to operate on. | |
322 | * @p_cur: Location where to store the next non-zero word. | |
323 | * | |
324 | * Return the index of the next nonzero word that is set in @hbi's | |
325 | * associated HBitmap, and set *p_cur to the content of that word | |
326 | * (bits before the index that was passed to hbitmap_iter_init are | |
327 | * trimmed on the first call). Return -1, and set *p_cur to zero, | |
328 | * if all remaining words are zero. | |
329 | */ | |
330 | static size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_cur) | |
331 | { | |
332 | unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1]; | |
333 | ||
334 | if (cur == 0) { | |
335 | cur = hbitmap_iter_skip_words(hbi); | |
336 | if (cur == 0) { | |
337 | *p_cur = 0; | |
338 | return -1; | |
339 | } | |
340 | } | |
341 | ||
342 | /* The next call will resume work from the next word. */ | |
343 | hbi->cur[HBITMAP_LEVELS - 1] = 0; | |
344 | *p_cur = cur; | |
345 | return hbi->pos; | |
346 | } | |
347 | ||
e7c033c3 PB |
348 | /* Count the number of set bits between start and end, not accounting for |
349 | * the granularity. Also an example of how to use hbitmap_iter_next_word. | |
350 | */ | |
351 | static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last) | |
352 | { | |
353 | HBitmapIter hbi; | |
354 | uint64_t count = 0; | |
355 | uint64_t end = last + 1; | |
356 | unsigned long cur; | |
357 | size_t pos; | |
358 | ||
359 | hbitmap_iter_init(&hbi, hb, start << hb->granularity); | |
360 | for (;;) { | |
361 | pos = hbitmap_iter_next_word(&hbi, &cur); | |
362 | if (pos >= (end >> BITS_PER_LEVEL)) { | |
363 | break; | |
364 | } | |
591b320a | 365 | count += ctpopl(cur); |
e7c033c3 PB |
366 | } |
367 | ||
368 | if (pos == (end >> BITS_PER_LEVEL)) { | |
369 | /* Drop bits representing the END-th and subsequent items. */ | |
370 | int bit = end & (BITS_PER_LONG - 1); | |
371 | cur &= (1UL << bit) - 1; | |
591b320a | 372 | count += ctpopl(cur); |
e7c033c3 PB |
373 | } |
374 | ||
375 | return count; | |
376 | } | |
377 | ||
378 | /* Setting starts at the last layer and propagates up if an element | |
07ac4cdb | 379 | * changes. |
e7c033c3 PB |
380 | */ |
381 | static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last) | |
382 | { | |
383 | unsigned long mask; | |
07ac4cdb | 384 | unsigned long old; |
e7c033c3 PB |
385 | |
386 | assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); | |
387 | assert(start <= last); | |
388 | ||
389 | mask = 2UL << (last & (BITS_PER_LONG - 1)); | |
390 | mask -= 1UL << (start & (BITS_PER_LONG - 1)); | |
07ac4cdb | 391 | old = *elem; |
e7c033c3 | 392 | *elem |= mask; |
07ac4cdb | 393 | return old != *elem; |
e7c033c3 PB |
394 | } |
395 | ||
07ac4cdb FZ |
396 | /* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... |
397 | * Returns true if at least one bit is changed. */ | |
398 | static bool hb_set_between(HBitmap *hb, int level, uint64_t start, | |
399 | uint64_t last) | |
e7c033c3 PB |
400 | { |
401 | size_t pos = start >> BITS_PER_LEVEL; | |
402 | size_t lastpos = last >> BITS_PER_LEVEL; | |
403 | bool changed = false; | |
404 | size_t i; | |
405 | ||
406 | i = pos; | |
407 | if (i < lastpos) { | |
408 | uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; | |
409 | changed |= hb_set_elem(&hb->levels[level][i], start, next - 1); | |
410 | for (;;) { | |
411 | start = next; | |
412 | next += BITS_PER_LONG; | |
413 | if (++i == lastpos) { | |
414 | break; | |
415 | } | |
416 | changed |= (hb->levels[level][i] == 0); | |
417 | hb->levels[level][i] = ~0UL; | |
418 | } | |
419 | } | |
420 | changed |= hb_set_elem(&hb->levels[level][i], start, last); | |
421 | ||
422 | /* If there was any change in this layer, we may have to update | |
423 | * the one above. | |
424 | */ | |
425 | if (level > 0 && changed) { | |
426 | hb_set_between(hb, level - 1, pos, lastpos); | |
427 | } | |
07ac4cdb | 428 | return changed; |
e7c033c3 PB |
429 | } |
430 | ||
431 | void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) | |
432 | { | |
433 | /* Compute range in the last layer. */ | |
07ac4cdb | 434 | uint64_t first, n; |
e7c033c3 PB |
435 | uint64_t last = start + count - 1; |
436 | ||
fed33bd1 VSO |
437 | if (count == 0) { |
438 | return; | |
439 | } | |
440 | ||
e7c033c3 PB |
441 | trace_hbitmap_set(hb, start, count, |
442 | start >> hb->granularity, last >> hb->granularity); | |
443 | ||
07ac4cdb | 444 | first = start >> hb->granularity; |
e7c033c3 | 445 | last >>= hb->granularity; |
0e321191 | 446 | assert(last < hb->size); |
07ac4cdb | 447 | n = last - first + 1; |
e7c033c3 | 448 | |
07ac4cdb FZ |
449 | hb->count += n - hb_count_between(hb, first, last); |
450 | if (hb_set_between(hb, HBITMAP_LEVELS - 1, first, last) && | |
451 | hb->meta) { | |
452 | hbitmap_set(hb->meta, start, count); | |
453 | } | |
e7c033c3 PB |
454 | } |
455 | ||
456 | /* Resetting works the other way round: propagate up if the new | |
457 | * value is zero. | |
458 | */ | |
459 | static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last) | |
460 | { | |
461 | unsigned long mask; | |
462 | bool blanked; | |
463 | ||
464 | assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); | |
465 | assert(start <= last); | |
466 | ||
467 | mask = 2UL << (last & (BITS_PER_LONG - 1)); | |
468 | mask -= 1UL << (start & (BITS_PER_LONG - 1)); | |
469 | blanked = *elem != 0 && ((*elem & ~mask) == 0); | |
470 | *elem &= ~mask; | |
471 | return blanked; | |
472 | } | |
473 | ||
07ac4cdb FZ |
474 | /* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... |
475 | * Returns true if at least one bit is changed. */ | |
476 | static bool hb_reset_between(HBitmap *hb, int level, uint64_t start, | |
477 | uint64_t last) | |
e7c033c3 PB |
478 | { |
479 | size_t pos = start >> BITS_PER_LEVEL; | |
480 | size_t lastpos = last >> BITS_PER_LEVEL; | |
481 | bool changed = false; | |
482 | size_t i; | |
483 | ||
484 | i = pos; | |
485 | if (i < lastpos) { | |
486 | uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; | |
487 | ||
488 | /* Here we need a more complex test than when setting bits. Even if | |
489 | * something was changed, we must not blank bits in the upper level | |
490 | * unless the lower-level word became entirely zero. So, remove pos | |
491 | * from the upper-level range if bits remain set. | |
492 | */ | |
493 | if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) { | |
494 | changed = true; | |
495 | } else { | |
496 | pos++; | |
497 | } | |
498 | ||
499 | for (;;) { | |
500 | start = next; | |
501 | next += BITS_PER_LONG; | |
502 | if (++i == lastpos) { | |
503 | break; | |
504 | } | |
505 | changed |= (hb->levels[level][i] != 0); | |
506 | hb->levels[level][i] = 0UL; | |
507 | } | |
508 | } | |
509 | ||
510 | /* Same as above, this time for lastpos. */ | |
511 | if (hb_reset_elem(&hb->levels[level][i], start, last)) { | |
512 | changed = true; | |
513 | } else { | |
514 | lastpos--; | |
515 | } | |
516 | ||
517 | if (level > 0 && changed) { | |
518 | hb_reset_between(hb, level - 1, pos, lastpos); | |
519 | } | |
07ac4cdb FZ |
520 | |
521 | return changed; | |
522 | ||
e7c033c3 PB |
523 | } |
524 | ||
525 | void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) | |
526 | { | |
527 | /* Compute range in the last layer. */ | |
07ac4cdb | 528 | uint64_t first; |
e7c033c3 | 529 | uint64_t last = start + count - 1; |
48557b13 VSO |
530 | uint64_t gran = 1ULL << hb->granularity; |
531 | ||
fed33bd1 VSO |
532 | if (count == 0) { |
533 | return; | |
534 | } | |
535 | ||
48557b13 VSO |
536 | assert(QEMU_IS_ALIGNED(start, gran)); |
537 | assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size)); | |
e7c033c3 PB |
538 | |
539 | trace_hbitmap_reset(hb, start, count, | |
540 | start >> hb->granularity, last >> hb->granularity); | |
541 | ||
07ac4cdb | 542 | first = start >> hb->granularity; |
e7c033c3 | 543 | last >>= hb->granularity; |
0e321191 | 544 | assert(last < hb->size); |
e7c033c3 | 545 | |
07ac4cdb FZ |
546 | hb->count -= hb_count_between(hb, first, last); |
547 | if (hb_reset_between(hb, HBITMAP_LEVELS - 1, first, last) && | |
548 | hb->meta) { | |
549 | hbitmap_set(hb->meta, start, count); | |
550 | } | |
e7c033c3 PB |
551 | } |
552 | ||
c6a8c328 WC |
553 | void hbitmap_reset_all(HBitmap *hb) |
554 | { | |
555 | unsigned int i; | |
556 | ||
557 | /* Same as hbitmap_alloc() except for memset() instead of malloc() */ | |
558 | for (i = HBITMAP_LEVELS; --i >= 1; ) { | |
559 | memset(hb->levels[i], 0, hb->sizes[i] * sizeof(unsigned long)); | |
560 | } | |
561 | ||
562 | hb->levels[0][0] = 1UL << (BITS_PER_LONG - 1); | |
563 | hb->count = 0; | |
564 | } | |
565 | ||
20a579de HR |
566 | bool hbitmap_is_serializable(const HBitmap *hb) |
567 | { | |
568 | /* Every serialized chunk must be aligned to 64 bits so that endianness | |
569 | * requirements can be fulfilled on both 64 bit and 32 bit hosts. | |
ecbfa281 | 570 | * We have hbitmap_serialization_align() which converts this |
20a579de HR |
571 | * alignment requirement from bitmap bits to items covered (e.g. sectors). |
572 | * That value is: | |
573 | * 64 << hb->granularity | |
574 | * Since this value must not exceed UINT64_MAX, hb->granularity must be | |
575 | * less than 58 (== 64 - 6, where 6 is ld(64), i.e. 1 << 6 == 64). | |
576 | * | |
ecbfa281 | 577 | * In order for hbitmap_serialization_align() to always return a |
20a579de HR |
578 | * meaningful value, bitmaps that are to be serialized must have a |
579 | * granularity of less than 58. */ | |
580 | ||
581 | return hb->granularity < 58; | |
582 | } | |
583 | ||
e7c033c3 PB |
584 | bool hbitmap_get(const HBitmap *hb, uint64_t item) |
585 | { | |
586 | /* Compute position and bit in the last layer. */ | |
587 | uint64_t pos = item >> hb->granularity; | |
588 | unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1)); | |
0e321191 | 589 | assert(pos < hb->size); |
e7c033c3 PB |
590 | |
591 | return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0; | |
592 | } | |
593 | ||
ecbfa281 | 594 | uint64_t hbitmap_serialization_align(const HBitmap *hb) |
8258888e | 595 | { |
20a579de | 596 | assert(hbitmap_is_serializable(hb)); |
6725f887 | 597 | |
8258888e VSO |
598 | /* Require at least 64 bit granularity to be safe on both 64 bit and 32 bit |
599 | * hosts. */ | |
6725f887 | 600 | return UINT64_C(64) << hb->granularity; |
8258888e VSO |
601 | } |
602 | ||
603 | /* Start should be aligned to serialization granularity, chunk size should be | |
604 | * aligned to serialization granularity too, except for last chunk. | |
605 | */ | |
606 | static void serialization_chunk(const HBitmap *hb, | |
607 | uint64_t start, uint64_t count, | |
608 | unsigned long **first_el, uint64_t *el_count) | |
609 | { | |
610 | uint64_t last = start + count - 1; | |
ecbfa281 | 611 | uint64_t gran = hbitmap_serialization_align(hb); |
8258888e VSO |
612 | |
613 | assert((start & (gran - 1)) == 0); | |
614 | assert((last >> hb->granularity) < hb->size); | |
615 | if ((last >> hb->granularity) != hb->size - 1) { | |
616 | assert((count & (gran - 1)) == 0); | |
617 | } | |
618 | ||
619 | start = (start >> hb->granularity) >> BITS_PER_LEVEL; | |
620 | last = (last >> hb->granularity) >> BITS_PER_LEVEL; | |
621 | ||
622 | *first_el = &hb->levels[HBITMAP_LEVELS - 1][start]; | |
623 | *el_count = last - start + 1; | |
624 | } | |
625 | ||
626 | uint64_t hbitmap_serialization_size(const HBitmap *hb, | |
627 | uint64_t start, uint64_t count) | |
628 | { | |
629 | uint64_t el_count; | |
630 | unsigned long *cur; | |
631 | ||
632 | if (!count) { | |
633 | return 0; | |
634 | } | |
635 | serialization_chunk(hb, start, count, &cur, &el_count); | |
636 | ||
637 | return el_count * sizeof(unsigned long); | |
638 | } | |
639 | ||
640 | void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf, | |
641 | uint64_t start, uint64_t count) | |
642 | { | |
643 | uint64_t el_count; | |
644 | unsigned long *cur, *end; | |
645 | ||
646 | if (!count) { | |
647 | return; | |
648 | } | |
649 | serialization_chunk(hb, start, count, &cur, &el_count); | |
650 | end = cur + el_count; | |
651 | ||
652 | while (cur != end) { | |
653 | unsigned long el = | |
654 | (BITS_PER_LONG == 32 ? cpu_to_le32(*cur) : cpu_to_le64(*cur)); | |
655 | ||
656 | memcpy(buf, &el, sizeof(el)); | |
657 | buf += sizeof(el); | |
658 | cur++; | |
659 | } | |
660 | } | |
661 | ||
662 | void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf, | |
663 | uint64_t start, uint64_t count, | |
664 | bool finish) | |
665 | { | |
666 | uint64_t el_count; | |
667 | unsigned long *cur, *end; | |
668 | ||
669 | if (!count) { | |
670 | return; | |
671 | } | |
672 | serialization_chunk(hb, start, count, &cur, &el_count); | |
673 | end = cur + el_count; | |
674 | ||
675 | while (cur != end) { | |
676 | memcpy(cur, buf, sizeof(*cur)); | |
677 | ||
678 | if (BITS_PER_LONG == 32) { | |
679 | le32_to_cpus((uint32_t *)cur); | |
680 | } else { | |
681 | le64_to_cpus((uint64_t *)cur); | |
682 | } | |
683 | ||
684 | buf += sizeof(unsigned long); | |
685 | cur++; | |
686 | } | |
687 | if (finish) { | |
688 | hbitmap_deserialize_finish(hb); | |
689 | } | |
690 | } | |
691 | ||
692 | void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count, | |
693 | bool finish) | |
694 | { | |
695 | uint64_t el_count; | |
696 | unsigned long *first; | |
697 | ||
698 | if (!count) { | |
699 | return; | |
700 | } | |
701 | serialization_chunk(hb, start, count, &first, &el_count); | |
702 | ||
703 | memset(first, 0, el_count * sizeof(unsigned long)); | |
704 | if (finish) { | |
705 | hbitmap_deserialize_finish(hb); | |
706 | } | |
707 | } | |
708 | ||
6bdc8b71 VSO |
709 | void hbitmap_deserialize_ones(HBitmap *hb, uint64_t start, uint64_t count, |
710 | bool finish) | |
711 | { | |
712 | uint64_t el_count; | |
713 | unsigned long *first; | |
714 | ||
715 | if (!count) { | |
716 | return; | |
717 | } | |
718 | serialization_chunk(hb, start, count, &first, &el_count); | |
719 | ||
720 | memset(first, 0xff, el_count * sizeof(unsigned long)); | |
721 | if (finish) { | |
722 | hbitmap_deserialize_finish(hb); | |
723 | } | |
724 | } | |
725 | ||
8258888e VSO |
726 | void hbitmap_deserialize_finish(HBitmap *bitmap) |
727 | { | |
728 | int64_t i, size, prev_size; | |
729 | int lev; | |
730 | ||
731 | /* restore levels starting from penultimate to zero level, assuming | |
732 | * that the last level is ok */ | |
733 | size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); | |
734 | for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) { | |
735 | prev_size = size; | |
736 | size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); | |
737 | memset(bitmap->levels[lev], 0, size * sizeof(unsigned long)); | |
738 | ||
739 | for (i = 0; i < prev_size; ++i) { | |
740 | if (bitmap->levels[lev + 1][i]) { | |
741 | bitmap->levels[lev][i >> BITS_PER_LEVEL] |= | |
742 | 1UL << (i & (BITS_PER_LONG - 1)); | |
743 | } | |
744 | } | |
745 | } | |
746 | ||
747 | bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1); | |
3260cdff | 748 | bitmap->count = hb_count_between(bitmap, 0, bitmap->size - 1); |
8258888e VSO |
749 | } |
750 | ||
e7c033c3 PB |
751 | void hbitmap_free(HBitmap *hb) |
752 | { | |
753 | unsigned i; | |
07ac4cdb | 754 | assert(!hb->meta); |
e7c033c3 PB |
755 | for (i = HBITMAP_LEVELS; i-- > 0; ) { |
756 | g_free(hb->levels[i]); | |
757 | } | |
758 | g_free(hb); | |
759 | } | |
760 | ||
761 | HBitmap *hbitmap_alloc(uint64_t size, int granularity) | |
762 | { | |
e1cf5582 | 763 | HBitmap *hb = g_new0(struct HBitmap, 1); |
e7c033c3 PB |
764 | unsigned i; |
765 | ||
6a150995 | 766 | assert(size <= INT64_MAX); |
76d570dc VSO |
767 | hb->orig_size = size; |
768 | ||
e7c033c3 PB |
769 | assert(granularity >= 0 && granularity < 64); |
770 | size = (size + (1ULL << granularity) - 1) >> granularity; | |
771 | assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE)); | |
772 | ||
773 | hb->size = size; | |
774 | hb->granularity = granularity; | |
775 | for (i = HBITMAP_LEVELS; i-- > 0; ) { | |
776 | size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); | |
8515efbe | 777 | hb->sizes[i] = size; |
e1cf5582 | 778 | hb->levels[i] = g_new0(unsigned long, size); |
e7c033c3 PB |
779 | } |
780 | ||
781 | /* We necessarily have free bits in level 0 due to the definition | |
782 | * of HBITMAP_LEVELS, so use one for a sentinel. This speeds up | |
783 | * hbitmap_iter_skip_words. | |
784 | */ | |
785 | assert(size == 1); | |
786 | hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1); | |
787 | return hb; | |
788 | } | |
be58721d | 789 | |
ce1ffea8 JS |
790 | void hbitmap_truncate(HBitmap *hb, uint64_t size) |
791 | { | |
792 | bool shrink; | |
793 | unsigned i; | |
794 | uint64_t num_elements = size; | |
795 | uint64_t old; | |
796 | ||
6a150995 | 797 | assert(size <= INT64_MAX); |
4e4de222 VSO |
798 | hb->orig_size = size; |
799 | ||
ce1ffea8 JS |
800 | /* Size comes in as logical elements, adjust for granularity. */ |
801 | size = (size + (1ULL << hb->granularity) - 1) >> hb->granularity; | |
802 | assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE)); | |
803 | shrink = size < hb->size; | |
804 | ||
805 | /* bit sizes are identical; nothing to do. */ | |
806 | if (size == hb->size) { | |
807 | return; | |
808 | } | |
809 | ||
810 | /* If we're losing bits, let's clear those bits before we invalidate all of | |
811 | * our invariants. This helps keep the bitcount consistent, and will prevent | |
812 | * us from carrying around garbage bits beyond the end of the map. | |
813 | */ | |
814 | if (shrink) { | |
815 | /* Don't clear partial granularity groups; | |
816 | * start at the first full one. */ | |
6725f887 | 817 | uint64_t start = ROUND_UP(num_elements, UINT64_C(1) << hb->granularity); |
ce1ffea8 JS |
818 | uint64_t fix_count = (hb->size << hb->granularity) - start; |
819 | ||
820 | assert(fix_count); | |
821 | hbitmap_reset(hb, start, fix_count); | |
822 | } | |
823 | ||
824 | hb->size = size; | |
825 | for (i = HBITMAP_LEVELS; i-- > 0; ) { | |
826 | size = MAX(BITS_TO_LONGS(size), 1); | |
827 | if (hb->sizes[i] == size) { | |
828 | break; | |
829 | } | |
830 | old = hb->sizes[i]; | |
831 | hb->sizes[i] = size; | |
832 | hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long)); | |
833 | if (!shrink) { | |
834 | memset(&hb->levels[i][old], 0x00, | |
835 | (size - old) * sizeof(*hb->levels[i])); | |
836 | } | |
837 | } | |
07ac4cdb FZ |
838 | if (hb->meta) { |
839 | hbitmap_truncate(hb->meta, hb->size << hb->granularity); | |
840 | } | |
ce1ffea8 JS |
841 | } |
842 | ||
fa000f2f VSO |
843 | bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b) |
844 | { | |
c5b40c1f JS |
845 | return (a->orig_size == b->orig_size); |
846 | } | |
847 | ||
848 | /** | |
849 | * hbitmap_sparse_merge: performs dst = dst | src | |
850 | * works with differing granularities. | |
851 | * best used when src is sparsely populated. | |
852 | */ | |
853 | static void hbitmap_sparse_merge(HBitmap *dst, const HBitmap *src) | |
854 | { | |
299ea9ff VSO |
855 | int64_t offset; |
856 | int64_t count; | |
c5b40c1f | 857 | |
299ea9ff VSO |
858 | for (offset = 0; |
859 | hbitmap_next_dirty_area(src, offset, src->orig_size, INT64_MAX, | |
860 | &offset, &count); | |
861 | offset += count) | |
862 | { | |
c5b40c1f | 863 | hbitmap_set(dst, offset, count); |
c5b40c1f | 864 | } |
fa000f2f | 865 | } |
ce1ffea8 | 866 | |
be58721d | 867 | /** |
3bde4b01 JS |
868 | * Given HBitmaps A and B, let R := A (BITOR) B. |
869 | * Bitmaps A and B will not be modified, | |
870 | * except when bitmap R is an alias of A or B. | |
be58721d JS |
871 | * |
872 | * @return true if the merge was successful, | |
873 | * false if it was not attempted. | |
874 | */ | |
fa000f2f | 875 | bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result) |
be58721d JS |
876 | { |
877 | int i; | |
878 | uint64_t j; | |
879 | ||
fa000f2f | 880 | if (!hbitmap_can_merge(a, b) || !hbitmap_can_merge(a, result)) { |
be58721d JS |
881 | return false; |
882 | } | |
fa000f2f | 883 | assert(hbitmap_can_merge(b, result)); |
be58721d | 884 | |
3bde4b01 JS |
885 | if ((!hbitmap_count(a) && result == b) || |
886 | (!hbitmap_count(b) && result == a)) { | |
887 | return true; | |
888 | } | |
889 | ||
890 | if (!hbitmap_count(a) && !hbitmap_count(b)) { | |
891 | hbitmap_reset_all(result); | |
be58721d JS |
892 | return true; |
893 | } | |
894 | ||
c5b40c1f JS |
895 | if (a->granularity != b->granularity) { |
896 | if ((a != result) && (b != result)) { | |
897 | hbitmap_reset_all(result); | |
898 | } | |
899 | if (a != result) { | |
900 | hbitmap_sparse_merge(result, a); | |
901 | } | |
902 | if (b != result) { | |
903 | hbitmap_sparse_merge(result, b); | |
904 | } | |
905 | return true; | |
906 | } | |
907 | ||
be58721d JS |
908 | /* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant. |
909 | * It may be possible to improve running times for sparsely populated maps | |
910 | * by using hbitmap_iter_next, but this is suboptimal for dense maps. | |
911 | */ | |
c5b40c1f | 912 | assert(a->size == b->size); |
be58721d JS |
913 | for (i = HBITMAP_LEVELS - 1; i >= 0; i--) { |
914 | for (j = 0; j < a->sizes[i]; j++) { | |
fa000f2f | 915 | result->levels[i][j] = a->levels[i][j] | b->levels[i][j]; |
be58721d JS |
916 | } |
917 | } | |
918 | ||
d1dde714 EB |
919 | /* Recompute the dirty count */ |
920 | result->count = hb_count_between(result, 0, result->size - 1); | |
921 | ||
be58721d JS |
922 | return true; |
923 | } | |
07ac4cdb | 924 | |
a3b52535 VSO |
925 | char *hbitmap_sha256(const HBitmap *bitmap, Error **errp) |
926 | { | |
927 | size_t size = bitmap->sizes[HBITMAP_LEVELS - 1] * sizeof(unsigned long); | |
928 | char *data = (char *)bitmap->levels[HBITMAP_LEVELS - 1]; | |
929 | char *hash = NULL; | |
930 | qcrypto_hash_digest(QCRYPTO_HASH_ALG_SHA256, data, size, &hash, errp); | |
931 | ||
932 | return hash; | |
933 | } |