2 * L2/refcount table cache for the QCOW2 format
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed for CONFIG_MADVISE */
26 #include "config-host.h"
28 #if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
32 #include "block/block_int.h"
33 #include "qemu-common.h"
34 #include "qemu/osdep.h"
38 typedef struct Qcow2CachedTable {
46 Qcow2CachedTable *entries;
47 struct Qcow2Cache *depends;
49 bool depends_on_flush;
52 uint64_t cache_clean_lru_counter;
55 static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
56 Qcow2Cache *c, int table)
58 BDRVQcow2State *s = bs->opaque;
59 return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
62 static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
63 Qcow2Cache *c, void *table)
65 BDRVQcow2State *s = bs->opaque;
66 ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
67 int idx = table_offset / s->cluster_size;
68 assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
72 static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
73 int i, int num_tables)
75 #if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
76 BDRVQcow2State *s = bs->opaque;
77 void *t = qcow2_cache_get_table_addr(bs, c, i);
78 int align = getpagesize();
79 size_t mem_size = (size_t) s->cluster_size * num_tables;
80 size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
81 size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
83 qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
88 static inline bool can_clean_entry(Qcow2Cache *c, int i)
90 Qcow2CachedTable *t = &c->entries[i];
91 return t->ref == 0 && !t->dirty && t->offset != 0 &&
92 t->lru_counter <= c->cache_clean_lru_counter;
95 void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
101 /* Skip the entries that we don't need to clean */
102 while (i < c->size && !can_clean_entry(c, i)) {
106 /* And count how many we can clean in a row */
107 while (i < c->size && can_clean_entry(c, i)) {
108 c->entries[i].offset = 0;
109 c->entries[i].lru_counter = 0;
115 qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
119 c->cache_clean_lru_counter = c->lru_counter;
122 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
124 BDRVQcow2State *s = bs->opaque;
127 c = g_new0(Qcow2Cache, 1);
128 c->size = num_tables;
129 c->entries = g_try_new0(Qcow2CachedTable, num_tables);
130 c->table_array = qemu_try_blockalign(bs->file,
131 (size_t) num_tables * s->cluster_size);
133 if (!c->entries || !c->table_array) {
134 qemu_vfree(c->table_array);
143 int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
147 for (i = 0; i < c->size; i++) {
148 assert(c->entries[i].ref == 0);
151 qemu_vfree(c->table_array);
158 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
162 ret = qcow2_cache_flush(bs, c->depends);
168 c->depends_on_flush = false;
173 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
175 BDRVQcow2State *s = bs->opaque;
178 if (!c->entries[i].dirty || !c->entries[i].offset) {
182 trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
183 c == s->l2_table_cache, i);
186 ret = qcow2_cache_flush_dependency(bs, c);
187 } else if (c->depends_on_flush) {
188 ret = bdrv_flush(bs->file);
190 c->depends_on_flush = false;
198 if (c == s->refcount_block_cache) {
199 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
200 c->entries[i].offset, s->cluster_size);
201 } else if (c == s->l2_table_cache) {
202 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
203 c->entries[i].offset, s->cluster_size);
205 ret = qcow2_pre_write_overlap_check(bs, 0,
206 c->entries[i].offset, s->cluster_size);
213 if (c == s->refcount_block_cache) {
214 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
215 } else if (c == s->l2_table_cache) {
216 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
219 ret = bdrv_pwrite(bs->file, c->entries[i].offset,
220 qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
225 c->entries[i].dirty = false;
230 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
232 BDRVQcow2State *s = bs->opaque;
237 trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
239 for (i = 0; i < c->size; i++) {
240 ret = qcow2_cache_entry_flush(bs, c, i);
241 if (ret < 0 && result != -ENOSPC) {
247 ret = bdrv_flush(bs->file);
256 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
257 Qcow2Cache *dependency)
261 if (dependency->depends) {
262 ret = qcow2_cache_flush_dependency(bs, dependency);
268 if (c->depends && (c->depends != dependency)) {
269 ret = qcow2_cache_flush_dependency(bs, c);
275 c->depends = dependency;
279 void qcow2_cache_depends_on_flush(Qcow2Cache *c)
281 c->depends_on_flush = true;
284 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
288 ret = qcow2_cache_flush(bs, c);
293 for (i = 0; i < c->size; i++) {
294 assert(c->entries[i].ref == 0);
295 c->entries[i].offset = 0;
296 c->entries[i].lru_counter = 0;
299 qcow2_cache_table_release(bs, c, 0, c->size);
306 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
307 uint64_t offset, void **table, bool read_from_disk)
309 BDRVQcow2State *s = bs->opaque;
313 uint64_t min_lru_counter = UINT64_MAX;
314 int min_lru_index = -1;
316 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
317 offset, read_from_disk);
319 /* Check if the table is already cached */
320 i = lookup_index = (offset / s->cluster_size * 4) % c->size;
322 const Qcow2CachedTable *t = &c->entries[i];
323 if (t->offset == offset) {
326 if (t->ref == 0 && t->lru_counter < min_lru_counter) {
327 min_lru_counter = t->lru_counter;
330 if (++i == c->size) {
333 } while (i != lookup_index);
335 if (min_lru_index == -1) {
336 /* This can't happen in current synchronous code, but leave the check
337 * here as a reminder for whoever starts using AIO with the cache */
341 /* Cache miss: write a table back and replace it */
343 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
344 c == s->l2_table_cache, i);
346 ret = qcow2_cache_entry_flush(bs, c, i);
351 trace_qcow2_cache_get_read(qemu_coroutine_self(),
352 c == s->l2_table_cache, i);
353 c->entries[i].offset = 0;
354 if (read_from_disk) {
355 if (c == s->l2_table_cache) {
356 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
359 ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
366 c->entries[i].offset = offset;
368 /* And return the right table */
371 *table = qcow2_cache_get_table_addr(bs, c, i);
373 trace_qcow2_cache_get_done(qemu_coroutine_self(),
374 c == s->l2_table_cache, i);
379 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
382 return qcow2_cache_do_get(bs, c, offset, table, true);
385 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
388 return qcow2_cache_do_get(bs, c, offset, table, false);
391 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
393 int i = qcow2_cache_get_table_idx(bs, c, *table);
398 if (c->entries[i].ref == 0) {
399 c->entries[i].lru_counter = ++c->lru_counter;
402 assert(c->entries[i].ref >= 0);
405 void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
408 int i = qcow2_cache_get_table_idx(bs, c, table);
409 assert(c->entries[i].offset != 0);
410 c->entries[i].dirty = true;