X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/3dc10613c313a042a111e46a977733411495ea8c..ff99129ab89a532f0ca0a0b89b9aa004c09d9b9d:/block/qcow2.c diff --git a/block/qcow2.c b/block/qcow2.c index 2ed8d95b1f..9b09e01e22 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -25,7 +25,6 @@ #include "block/block_int.h" #include "qemu/module.h" #include -#include "qemu/aes.h" #include "block/qcow2.h" #include "qemu/error-report.h" #include "qapi/qmp/qerror.h" @@ -86,7 +85,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, uint64_t end_offset, void **p_feature_table, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowExtension ext; uint64_t offset; int ret; @@ -140,6 +139,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, return 3; } bs->backing_format[ext.len] = '\0'; + s->image_backing_format = g_strdup(bs->backing_format); #ifdef DEBUG_EXT printf("Qcow2: Got format extension %s\n", bs->backing_format); #endif @@ -187,7 +187,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, static void cleanup_unknown_header_ext(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; Qcow2UnknownHeaderExtension *uext, *next; QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { @@ -206,8 +206,8 @@ static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs, vsnprintf(msg, sizeof(msg), fmt, ap); va_end(ap); - error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, - bdrv_get_device_name(bs), "qcow2", msg); + error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, + bdrv_get_device_or_node_name(bs), "qcow2", msg); } static void report_unsupported_feature(BlockDriverState *bs, @@ -249,7 +249,7 @@ static void report_unsupported_feature(BlockDriverState *bs, */ int qcow2_mark_dirty(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t val; int ret; @@ -282,7 +282,7 @@ int qcow2_mark_dirty(BlockDriverState *bs) */ static int qcow2_mark_clean(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { int ret; @@ -304,7 +304,7 @@ static int qcow2_mark_clean(BlockDriverState *bs) */ int qcow2_mark_corrupt(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; return qcow2_update_header(bs); @@ -316,7 +316,7 @@ int qcow2_mark_corrupt(BlockDriverState *bs) */ int qcow2_mark_consistent(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { int ret = bdrv_flush(bs); @@ -351,7 +351,7 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, static int validate_table_offset(BlockDriverState *bs, uint64_t offset, uint64_t entries, size_t entry_len) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t size; /* Use signed INT64_MAX as the maximum even for uint64_t header fields, @@ -467,6 +467,11 @@ static QemuOptsList qcow2_runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Maximum refcount block cache size", }, + { + .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, + .type = QEMU_OPT_NUMBER, + .help = "Clean unused cache entries after this time (in seconds)", + }, { /* end of list */ } }, }; @@ -482,9 +487,54 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, }; -static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, +static void cache_clean_timer_cb(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + qcow2_cache_clean_unused(bs, s->l2_table_cache); + qcow2_cache_clean_unused(bs, s->refcount_block_cache); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); +} + +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) +{ + BDRVQcow2State *s = bs->opaque; + if (s->cache_clean_interval > 0) { + s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, + SCALE_MS, cache_clean_timer_cb, + bs); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); + } +} + +static void cache_clean_timer_del(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + if (s->cache_clean_timer) { + timer_del(s->cache_clean_timer); + timer_free(s->cache_clean_timer); + s->cache_clean_timer = NULL; + } +} + +static void qcow2_detach_aio_context(BlockDriverState *bs) +{ + cache_clean_timer_del(bs); +} + +static void qcow2_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + cache_clean_timer_init(bs, new_context); +} + +static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, + uint64_t *l2_cache_size, uint64_t *refcount_cache_size, Error **errp) { + BDRVQcow2State *s = bs->opaque; uint64_t combined_cache_size; bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; @@ -524,7 +574,9 @@ static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, } } else { if (!l2_cache_size_set && !refcount_cache_size_set) { - *l2_cache_size = DEFAULT_L2_CACHE_BYTE_SIZE; + *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, + (uint64_t)DEFAULT_L2_CACHE_CLUSTERS + * s->cluster_size); *refcount_cache_size = *l2_cache_size / DEFAULT_L2_REFCOUNT_SIZE_RATIO; } else if (!l2_cache_size_set) { @@ -540,7 +592,7 @@ static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; unsigned int len, i; int ret = 0; QCowHeader header; @@ -551,6 +603,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, const char *opt_overlap_check, *opt_overlap_check_template; int overlap_check_template = 0; uint64_t l2_cache_size, refcount_cache_size; + uint64_t cache_clean_interval; ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); if (ret < 0) { @@ -677,13 +730,16 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } /* Check support for various header values */ - if (header.refcount_order != 4) { - report_unsupported(bs, errp, "%d bit reference counts", - 1 << header.refcount_order); - ret = -ENOTSUP; + if (header.refcount_order > 6) { + error_setg(errp, "Reference count entry width too large; may not " + "exceed 64 bits"); + ret = -EINVAL; goto fail; } s->refcount_order = header.refcount_order; + s->refcount_bits = 1 << s->refcount_order; + s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); + s->refcount_max += s->refcount_max - 1; if (header.crypt_method > QCOW_CRYPT_AES) { error_setg(errp, "Unsupported encryption method: %" PRIu32, @@ -691,6 +747,11 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, ret = -EINVAL; goto fail; } + if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) { + error_setg(errp, "AES cipher not available"); + ret = -EINVAL; + goto fail; + } s->crypt_method_header = header.crypt_method; if (s->crypt_method_header) { bs->encrypted = 1; @@ -739,7 +800,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } /* read the level 1 table */ - if (header.l1_size > QCOW_MAX_L1_SIZE) { + if (header.l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) { error_setg(errp, "Active L1 table too large"); ret = -EFBIG; goto fail; @@ -799,7 +860,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - read_cache_sizes(opts, &l2_cache_size, &refcount_cache_size, &local_err); + read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, + &local_err); if (local_err) { error_propagate(errp, local_err); ret = -EINVAL; @@ -835,6 +897,16 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + cache_clean_interval = + qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 0); + if (cache_clean_interval > UINT_MAX) { + error_setg(errp, "Cache clean interval too big"); + ret = -EINVAL; + goto fail; + } + s->cache_clean_interval = cache_clean_interval; + cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); + s->cluster_cache = g_malloc(s->cluster_size); /* one more sector for decompressed data alignment */ s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS @@ -881,6 +953,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } bs->backing_file[len] = '\0'; + s->image_backing_file = g_strdup(bs->backing_file); } /* Internal snapshots */ @@ -999,6 +1072,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; + cache_clean_timer_del(bs); if (s->l2_table_cache) { qcow2_cache_destroy(bs, s->l2_table_cache); } @@ -1012,16 +1086,17 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bs->bl.write_zeroes_alignment = s->cluster_sectors; } static int qcow2_set_key(BlockDriverState *bs, const char *key) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint8_t keybuf[16]; int len, i; + Error *err = NULL; memset(keybuf, 0, 16); len = strlen(key); @@ -1032,30 +1107,22 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key) for(i = 0;i < len;i++) { keybuf[i] = key[i]; } - s->crypt_method = s->crypt_method_header; + assert(bs->encrypted); - if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) - return -1; - if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) + qcrypto_cipher_free(s->cipher); + s->cipher = qcrypto_cipher_new( + QCRYPTO_CIPHER_ALG_AES_128, + QCRYPTO_CIPHER_MODE_CBC, + keybuf, G_N_ELEMENTS(keybuf), + &err); + + if (!s->cipher) { + /* XXX would be nice if errors in this method could + * be properly propagate to the caller. Would need + * the bdrv_set_key() API signature to be fixed. */ + error_free(err); return -1; -#if 0 - /* test */ - { - uint8_t in[16]; - uint8_t out[16]; - uint8_t tmp[16]; - for(i=0;i<16;i++) - in[i] = i; - AES_encrypt(in, tmp, &s->aes_encrypt_key); - AES_decrypt(tmp, out, &s->aes_decrypt_key); - for(i = 0; i < 16; i++) - printf(" %02x", tmp[i]); - printf("\n"); - for(i = 0; i < 16; i++) - printf(" %02x", out[i]); - printf("\n"); } -#endif return 0; } @@ -1084,7 +1151,7 @@ static int qcow2_reopen_prepare(BDRVReopenState *state, static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t cluster_offset; int index_in_cluster, ret; int64_t status = 0; @@ -1098,7 +1165,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, } if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && - !s->crypt_method) { + !s->cipher) { index_in_cluster = sector_num & (s->cluster_sectors - 1); cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; @@ -1131,7 +1198,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, QEMUIOVector *qiov) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int index_in_cluster, n1; int ret; int cur_nr_sectors; /* number of sectors in current iteration */ @@ -1148,7 +1215,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, /* prepare next request */ cur_nr_sectors = remaining_sectors; - if (s->crypt_method) { + if (s->cipher) { cur_nr_sectors = MIN(cur_nr_sectors, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); } @@ -1219,7 +1286,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, goto fail; } - if (s->crypt_method) { + if (bs->encrypted) { + assert(s->cipher); + /* * For encrypted images, read everything into a temporary * contiguous buffer on which the AES functions can work. @@ -1250,9 +1319,16 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { goto fail; } - if (s->crypt_method) { - qcow2_encrypt_sectors(s, sector_num, cluster_data, - cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key); + if (bs->encrypted) { + assert(s->cipher); + Error *err = NULL; + if (qcow2_encrypt_sectors(s, sector_num, cluster_data, + cluster_data, cur_nr_sectors, false, + &err) < 0) { + error_free(err); + ret = -EIO; + goto fail; + } qemu_iovec_from_buf(qiov, bytes_done, cluster_data, 512 * cur_nr_sectors); } @@ -1284,7 +1360,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, int remaining_sectors, QEMUIOVector *qiov) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int index_in_cluster; int ret; int cur_nr_sectors; /* number of sectors in current iteration */ @@ -1310,7 +1386,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num & (s->cluster_sectors - 1); cur_nr_sectors = remaining_sectors; - if (s->crypt_method && + if (bs->encrypted && cur_nr_sectors > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) { cur_nr_sectors = @@ -1329,7 +1405,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_nr_sectors * 512); - if (s->crypt_method) { + if (bs->encrypted) { + Error *err = NULL; + assert(s->cipher); if (!cluster_data) { cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS @@ -1344,8 +1422,13 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); - qcow2_encrypt_sectors(s, sector_num, cluster_data, - cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key); + if (qcow2_encrypt_sectors(s, sector_num, cluster_data, + cluster_data, cur_nr_sectors, + true, &err) < 0) { + error_free(err); + ret = -EIO; + goto fail; + } qemu_iovec_reset(&hd_qiov); qemu_iovec_add(&hd_qiov, cluster_data, @@ -1423,7 +1506,7 @@ fail: static void qcow2_close(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; @@ -1448,12 +1531,19 @@ static void qcow2_close(BlockDriverState *bs) } } + cache_clean_timer_del(bs); qcow2_cache_destroy(bs, s->l2_table_cache); qcow2_cache_destroy(bs, s->refcount_block_cache); + qcrypto_cipher_free(s->cipher); + s->cipher = NULL; + g_free(s->unknown_header_fields); cleanup_unknown_header_ext(bs); + g_free(s->image_backing_file); + g_free(s->image_backing_format); + g_free(s->cluster_cache); qemu_vfree(s->cluster_data); qcow2_refcount_close(bs); @@ -1462,11 +1552,9 @@ static void qcow2_close(BlockDriverState *bs) static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int flags = s->flags; - AES_KEY aes_encrypt_key; - AES_KEY aes_decrypt_key; - uint32_t crypt_method = 0; + QCryptoCipher *cipher = NULL; QDict *options; Error *local_err = NULL; int ret; @@ -1476,11 +1564,8 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) * that means we don't have to worry about reopening them here. */ - if (s->crypt_method) { - crypt_method = s->crypt_method; - memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key)); - memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key)); - } + cipher = s->cipher; + s->cipher = NULL; qcow2_close(bs); @@ -1490,7 +1575,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) return; } - memset(s, 0, sizeof(BDRVQcowState)); + memset(s, 0, sizeof(BDRVQcow2State)); options = qdict_clone_shallow(bs->options); ret = qcow2_open(bs, options, flags, &local_err); @@ -1505,11 +1590,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) return; } - if (crypt_method) { - s->crypt_method = crypt_method; - memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key)); - memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key)); - } + s->cipher = cipher; } static size_t header_ext_add(char *buf, uint32_t magic, const void *s, @@ -1541,7 +1622,7 @@ static size_t header_ext_add(char *buf, uint32_t magic, const void *s, */ int qcow2_update_header(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowHeader *header; char *buf; size_t buflen = s->cluster_size; @@ -1619,9 +1700,10 @@ int qcow2_update_header(BlockDriverState *bs) } /* Backing file format header extension */ - if (*bs->backing_format) { + if (s->image_backing_format) { ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, - bs->backing_format, strlen(bs->backing_format), + s->image_backing_format, + strlen(s->image_backing_format), buflen); if (ret < 0) { goto fail; @@ -1679,8 +1761,8 @@ int qcow2_update_header(BlockDriverState *bs) buflen -= ret; /* Backing file name */ - if (*bs->backing_file) { - size_t backing_file_len = strlen(bs->backing_file); + if (s->image_backing_file) { + size_t backing_file_len = strlen(s->image_backing_file); if (buflen < backing_file_len) { ret = -ENOSPC; @@ -1688,7 +1770,7 @@ int qcow2_update_header(BlockDriverState *bs) } /* Using strncpy is ok here, since buf is not NUL-terminated. */ - strncpy(buf, bs->backing_file, buflen); + strncpy(buf, s->image_backing_file, buflen); header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); header->backing_file_size = cpu_to_be32(backing_file_len); @@ -1709,9 +1791,17 @@ fail: static int qcow2_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt) { + BDRVQcow2State *s = bs->opaque; + pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); + g_free(s->image_backing_file); + g_free(s->image_backing_format); + + s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; + s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; + return qcow2_update_header(bs); } @@ -1780,12 +1870,14 @@ static int preallocate(BlockDriverState *bs) static int qcow2_create2(const char *filename, int64_t total_size, const char *backing_file, const char *backing_format, int flags, size_t cluster_size, PreallocMode prealloc, - QemuOpts *opts, int version, + QemuOpts *opts, int version, int refcount_order, Error **errp) { - /* Calculate cluster_bits */ int cluster_bits; - cluster_bits = ffs(cluster_size) - 1; + QDict *options; + + /* Calculate cluster_bits */ + cluster_bits = ctz32(cluster_size); if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || (1 << cluster_bits) != cluster_size) { @@ -1813,9 +1905,21 @@ static int qcow2_create2(const char *filename, int64_t total_size, int ret; if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { + /* Note: The following calculation does not need to be exact; if it is a + * bit off, either some bytes will be "leaked" (which is fine) or we + * will need to increase the file size by some bytes (which is fine, + * too, as long as the bulk is allocated here). Therefore, using + * floating point arithmetic is fine. */ int64_t meta_size = 0; uint64_t nreftablee, nrefblocke, nl1e, nl2e; int64_t aligned_total_size = align_offset(total_size, cluster_size); + int refblock_bits, refblock_size; + /* refcount entry size in bytes */ + double rces = (1 << refcount_order) / 8.; + + /* see qcow2_open() */ + refblock_bits = cluster_bits - (refcount_order - 3); + refblock_size = 1 << refblock_bits; /* header: 1 cluster */ meta_size += cluster_size; @@ -1840,26 +1944,27 @@ static int qcow2_create2(const char *filename, int64_t total_size, * c = cluster size * y1 = number of refcount blocks entries * y2 = meta size including everything + * rces = refcount entry size in bytes * then, * y1 = (y2 + a)/c - * y2 = y1 * sizeof(u16) + y1 * sizeof(u16) * sizeof(u64) / c + m + * y2 = y1 * rces + y1 * rces * sizeof(u64) / c + m * we can get y1: - * y1 = (a + m) / (c - sizeof(u16) - sizeof(u16) * sizeof(u64) / c) + * y1 = (a + m) / (c - rces - rces * sizeof(u64) / c) */ - nrefblocke = (aligned_total_size + meta_size + cluster_size) / - (cluster_size - sizeof(uint16_t) - - 1.0 * sizeof(uint16_t) * sizeof(uint64_t) / cluster_size); - nrefblocke = align_offset(nrefblocke, cluster_size / sizeof(uint16_t)); - meta_size += nrefblocke * sizeof(uint16_t); + nrefblocke = (aligned_total_size + meta_size + cluster_size) + / (cluster_size - rces - rces * sizeof(uint64_t) + / cluster_size); + meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size; /* total size of refcount tables */ - nreftablee = nrefblocke * sizeof(uint16_t) / cluster_size; + nreftablee = nrefblocke / refblock_size; nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t)); meta_size += nreftablee * sizeof(uint64_t); qemu_opt_set_number(opts, BLOCK_OPT_SIZE, - aligned_total_size + meta_size); - qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc]); + aligned_total_size + meta_size, &error_abort); + qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc], + &error_abort); } ret = bdrv_create_file(filename, opts, &local_err); @@ -1870,7 +1975,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, bs = NULL; ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - NULL, &local_err); + &local_err); if (ret < 0) { error_propagate(errp, local_err); return ret; @@ -1888,7 +1993,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, .l1_size = cpu_to_be32(0), .refcount_table_offset = cpu_to_be64(cluster_size), .refcount_table_clusters = cpu_to_be32(1), - .refcount_order = cpu_to_be32(4), + .refcount_order = cpu_to_be32(refcount_order), .header_length = cpu_to_be32(sizeof(*header)), }; @@ -1929,9 +2034,11 @@ static int qcow2_create2(const char *filename, int64_t total_size, * refcount of the cluster that is occupied by the header and the refcount * table) */ - ret = bdrv_open(&bs, filename, NULL, NULL, + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str("qcow2")); + ret = bdrv_open(&bs, filename, NULL, options, BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, - &bdrv_qcow2, &local_err); + &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; @@ -1967,7 +2074,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* And if we're supposed to preallocate metadata, do that now */ if (prealloc != PREALLOC_MODE_OFF) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; qemu_co_mutex_lock(&s->lock); ret = preallocate(bs); qemu_co_mutex_unlock(&s->lock); @@ -1981,9 +2088,11 @@ static int qcow2_create2(const char *filename, int64_t total_size, bs = NULL; /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ - ret = bdrv_open(&bs, filename, NULL, NULL, + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str("qcow2")); + ret = bdrv_open(&bs, filename, NULL, options, BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, - &bdrv_qcow2, &local_err); + &local_err); if (local_err) { error_propagate(errp, local_err); goto out; @@ -2007,6 +2116,8 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) size_t cluster_size = DEFAULT_CLUSTER_SIZE; PreallocMode prealloc; int version = 3; + uint64_t refcount_bits = 16; + int refcount_order; Error *local_err = NULL; int ret; @@ -2061,8 +2172,28 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) goto finish; } + refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, + refcount_bits); + if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { + error_setg(errp, "Refcount width must be a power of two and may not " + "exceed 64 bits"); + ret = -EINVAL; + goto finish; + } + + if (version < 3 && refcount_bits != 16) { + error_setg(errp, "Different refcount widths than 16 bits require " + "compatibility level 1.1 or above (use compat=1.1 or " + "greater)"); + ret = -EINVAL; + goto finish; + } + + refcount_order = ctz32(refcount_bits); + ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags, - cluster_size, prealloc, opts, version, &local_err); + cluster_size, prealloc, opts, version, refcount_order, + &local_err); if (local_err) { error_propagate(errp, local_err); } @@ -2078,7 +2209,7 @@ static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { int ret; - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; /* Emulate misaligned zero writes */ if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) { @@ -2098,7 +2229,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { int ret; - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; qemu_co_mutex_lock(&s->lock); ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, @@ -2109,7 +2240,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, static int qcow2_truncate(BlockDriverState *bs, int64_t offset) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t new_l1_size; int ret; @@ -2153,7 +2284,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; z_stream strm; int ret, out_len; uint8_t *out_buf; @@ -2244,7 +2375,7 @@ fail: static int make_completely_empty(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret, l1_clusters; int64_t offset; uint64_t *new_reftable = NULL; @@ -2392,7 +2523,7 @@ fail: static int qcow2_make_empty(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t start_sector; int sector_step = INT_MAX / BDRV_SECTOR_SIZE; int l1_clusters, ret = 0; @@ -2433,7 +2564,7 @@ static int qcow2_make_empty(BlockDriverState *bs) static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret; qemu_co_mutex_lock(&s->lock); @@ -2457,7 +2588,7 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bdi->unallocated_blocks_are_zero = true; bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3); bdi->cluster_size = s->cluster_size; @@ -2467,7 +2598,7 @@ static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); *spec_info = (ImageInfoSpecific){ @@ -2478,7 +2609,8 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) }; if (s->qcow_version == 2) { *spec_info->qcow2 = (ImageInfoSpecificQCow2){ - .compat = g_strdup("0.10"), + .compat = g_strdup("0.10"), + .refcount_bits = s->refcount_bits, }; } else if (s->qcow_version == 3) { *spec_info->qcow2 = (ImageInfoSpecificQCow2){ @@ -2489,6 +2621,7 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) .corrupt = s->incompatible_features & QCOW2_INCOMPAT_CORRUPT, .has_corrupt = true, + .refcount_bits = s->refcount_bits, }; } @@ -2498,7 +2631,7 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) #if 0 static void dump_refcounts(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t nb_clusters, k, k1, size; int refcount; @@ -2519,7 +2652,7 @@ static void dump_refcounts(BlockDriverState *bs) static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t total_sectors = bs->total_sectors; bool zero_beyond_eof = bs->zero_beyond_eof; int ret; @@ -2540,7 +2673,7 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bool zero_beyond_eof = bs->zero_beyond_eof; int ret; @@ -2559,7 +2692,7 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, static int qcow2_downgrade(BlockDriverState *bs, int target_version, BlockDriverAmendStatusCB *status_cb) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int current_version = s->qcow_version; int ret; @@ -2623,7 +2756,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, BlockDriverAmendStatusCB *status_cb) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int old_version = s->qcow_version, new_version = old_version; uint64_t new_size = 0; const char *backing_file = NULL, *backing_format = NULL; @@ -2641,8 +2774,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, continue; } - if (!strcmp(desc->name, "compat")) { - compat = qemu_opt_get(opts, "compat"); + if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { + compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); if (!compat) { /* preserve default */ } else if (!strcmp(compat, "0.10")) { @@ -2653,33 +2786,38 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, fprintf(stderr, "Unknown compatibility level %s.\n", compat); return -EINVAL; } - } else if (!strcmp(desc->name, "preallocation")) { + } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { fprintf(stderr, "Cannot change preallocation mode.\n"); return -ENOTSUP; - } else if (!strcmp(desc->name, "size")) { - new_size = qemu_opt_get_size(opts, "size", 0); - } else if (!strcmp(desc->name, "backing_file")) { - backing_file = qemu_opt_get(opts, "backing_file"); - } else if (!strcmp(desc->name, "backing_fmt")) { - backing_format = qemu_opt_get(opts, "backing_fmt"); - } else if (!strcmp(desc->name, "encryption")) { - encrypt = qemu_opt_get_bool(opts, "encryption", s->crypt_method); - if (encrypt != !!s->crypt_method) { + } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { + new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); + } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { + backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); + } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { + backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); + } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) { + encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, + !!s->cipher); + + if (encrypt != !!s->cipher) { fprintf(stderr, "Changing the encryption flag is not " "supported.\n"); return -ENOTSUP; } - } else if (!strcmp(desc->name, "cluster_size")) { - cluster_size = qemu_opt_get_size(opts, "cluster_size", + } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { + cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, cluster_size); if (cluster_size != s->cluster_size) { fprintf(stderr, "Changing the cluster size is not " "supported.\n"); return -ENOTSUP; } - } else if (!strcmp(desc->name, "lazy_refcounts")) { - lazy_refcounts = qemu_opt_get_bool(opts, "lazy_refcounts", + } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { + lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, lazy_refcounts); + } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { + error_report("Cannot change refcount entry width"); + return -ENOTSUP; } else { /* if this assertion fails, this probably means a new option was * added without having it covered here */ @@ -2707,8 +2845,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } if (backing_file || backing_format) { - ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file, - backing_format ?: bs->backing_format); + ret = qcow2_change_backing_file(bs, + backing_file ?: s->image_backing_file, + backing_format ?: s->image_backing_format); if (ret < 0) { return ret; } @@ -2764,7 +2903,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, int64_t size, const char *message_format, ...) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; + const char *node_name; char *message; va_list ap; @@ -2788,8 +2928,11 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, "corruption events will be suppressed\n", message); } - qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), message, - offset >= 0, offset, size >= 0, size, + node_name = bdrv_get_node_name(bs); + qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), + *node_name != '\0', node_name, + message, offset >= 0, offset, + size >= 0, size, fatal, &error_abort); g_free(message); @@ -2849,13 +2992,19 @@ static QemuOptsList qcow2_create_opts = { .help = "Postpone refcount updates", .def_value_str = "off" }, + { + .name = BLOCK_OPT_REFCOUNT_BITS, + .type = QEMU_OPT_NUMBER, + .help = "Width of a reference count entry in bits", + .def_value_str = "16" + }, { /* end of list */ } } }; BlockDriver bdrv_qcow2 = { .format_name = "qcow2", - .instance_size = sizeof(BDRVQcowState), + .instance_size = sizeof(BDRVQcow2State), .bdrv_probe = qcow2_probe, .bdrv_open = qcow2_open, .bdrv_close = qcow2_close, @@ -2895,6 +3044,9 @@ BlockDriver bdrv_qcow2 = { .create_opts = &qcow2_create_opts, .bdrv_check = qcow2_check, .bdrv_amend_options = qcow2_amend_options, + + .bdrv_detach_aio_context = qcow2_detach_aio_context, + .bdrv_attach_aio_context = qcow2_attach_aio_context, }; static void bdrv_qcow2_init(void)