#include "qapi/qobject-input-visitor.h"
#include "qapi/qapi-visit-block-core.h"
#include "crypto.h"
+#include "block/aio_task.h"
/*
Differences with QCOW:
bool l2_cache_entry_size_set;
int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
+ uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
+ /* An L2 table is always one cluster in size so the max cache size
+ * should be a multiple of the cluster size. */
+ uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t),
+ s->cluster_size);
combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
s->snapshots_offset = header.snapshots_offset;
s->nb_snapshots = header.nb_snapshots;
- ret = qcow2_read_snapshots(bs);
+ ret = qcow2_read_snapshots(bs, errp);
if (ret < 0) {
- error_setg_errno(errp, -ret, "Could not read snapshots");
goto fail;
}
static void qcow2_reopen_commit(BDRVReopenState *state)
{
qcow2_update_options_commit(state->bs, state->opaque);
+ if (state->flags & BDRV_O_RDWR) {
+ Error *local_err = NULL;
+
+ if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
+ /*
+ * This is not fatal, bitmaps just left read-only, so all following
+ * writes will fail. User can remove read-only bitmaps to unblock
+ * writes or retry reopen.
+ */
+ error_reportf_err(local_err,
+ "%s: Failed to make dirty bitmaps writable: ",
+ bdrv_get_node_name(state->bs));
+ }
+ }
g_free(state->opaque);
}
unsigned int bytes;
int status = 0;
+ qemu_co_mutex_lock(&s->lock);
+
if (!s->metadata_preallocation_checked) {
ret = qcow2_detect_metadata_preallocation(bs);
s->metadata_preallocation = (ret == 1);
}
bytes = MIN(INT_MAX, count);
- qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
return ret;
}
+static coroutine_fn int
+qcow2_co_preadv_encrypted(BlockDriverState *bs,
+ uint64_t file_cluster_offset,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov,
+ uint64_t qiov_offset)
+{
+ int ret;
+ BDRVQcow2State *s = bs->opaque;
+ uint8_t *buf;
+
+ assert(bs->encrypted && s->crypto);
+ assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+
+ /*
+ * For encrypted images, read everything into a temporary
+ * contiguous buffer on which the AES functions can work.
+ * Also, decryption in a separate buffer is better as it
+ * prevents the guest from learning information about the
+ * encrypted nature of the virtual disk.
+ */
+
+ buf = qemu_try_blockalign(s->data_file->bs, bytes);
+ if (buf == NULL) {
+ return -ENOMEM;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ ret = bdrv_co_pread(s->data_file,
+ file_cluster_offset + offset_into_cluster(s, offset),
+ bytes, buf, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
+ if (qcow2_co_decrypt(bs,
+ file_cluster_offset + offset_into_cluster(s, offset),
+ offset, buf, bytes) < 0)
+ {
+ ret = -EIO;
+ goto fail;
+ }
+ qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
+
+fail:
+ qemu_vfree(buf);
+
+ return ret;
+}
+
+typedef struct Qcow2AioTask {
+ AioTask task;
+
+ BlockDriverState *bs;
+ QCow2ClusterType cluster_type; /* only for read */
+ uint64_t file_cluster_offset;
+ uint64_t offset;
+ uint64_t bytes;
+ QEMUIOVector *qiov;
+ uint64_t qiov_offset;
+ QCowL2Meta *l2meta; /* only for write */
+} Qcow2AioTask;
+
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
+static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
+ AioTaskPool *pool,
+ AioTaskFunc func,
+ QCow2ClusterType cluster_type,
+ uint64_t file_cluster_offset,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov,
+ size_t qiov_offset,
+ QCowL2Meta *l2meta)
+{
+ Qcow2AioTask local_task;
+ Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
+
+ *task = (Qcow2AioTask) {
+ .task.func = func,
+ .bs = bs,
+ .cluster_type = cluster_type,
+ .qiov = qiov,
+ .file_cluster_offset = file_cluster_offset,
+ .offset = offset,
+ .bytes = bytes,
+ .qiov_offset = qiov_offset,
+ .l2meta = l2meta,
+ };
+
+ trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
+ func == qcow2_co_preadv_task_entry ? "read" : "write",
+ cluster_type, file_cluster_offset, offset, bytes,
+ qiov, qiov_offset);
+
+ if (!pool) {
+ return func(&task->task);
+ }
+
+ aio_task_pool_start_task(pool, &task->task);
+
+ return 0;
+}
+
+static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
+ QCow2ClusterType cluster_type,
+ uint64_t file_cluster_offset,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov,
+ size_t qiov_offset)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int offset_in_cluster = offset_into_cluster(s, offset);
+
+ switch (cluster_type) {
+ case QCOW2_CLUSTER_ZERO_PLAIN:
+ case QCOW2_CLUSTER_ZERO_ALLOC:
+ /* Both zero types are handled in qcow2_co_preadv_part */
+ g_assert_not_reached();
+
+ case QCOW2_CLUSTER_UNALLOCATED:
+ assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+ return bdrv_co_preadv_part(bs->backing, offset, bytes,
+ qiov, qiov_offset, 0);
+
+ case QCOW2_CLUSTER_COMPRESSED:
+ return qcow2_co_preadv_compressed(bs, file_cluster_offset,
+ offset, bytes, qiov, qiov_offset);
+
+ case QCOW2_CLUSTER_NORMAL:
+ if ((file_cluster_offset & 511) != 0) {
+ return -EIO;
+ }
+
+ if (bs->encrypted) {
+ return qcow2_co_preadv_encrypted(bs, file_cluster_offset,
+ offset, bytes, qiov, qiov_offset);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ return bdrv_co_preadv_part(s->data_file,
+ file_cluster_offset + offset_in_cluster,
+ bytes, qiov, qiov_offset, 0);
+
+ default:
+ g_assert_not_reached();
+ }
+
+ g_assert_not_reached();
+}
+
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
+{
+ Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+ assert(!t->l2meta);
+
+ return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset,
+ t->offset, t->bytes, t->qiov, t->qiov_offset);
+}
+
static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov,
size_t qiov_offset, int flags)
{
BDRVQcow2State *s = bs->opaque;
- int offset_in_cluster;
- int ret;
+ int ret = 0;
unsigned int cur_bytes; /* number of bytes in current iteration */
uint64_t cluster_offset = 0;
- uint8_t *cluster_data = NULL;
-
- while (bytes != 0) {
+ AioTaskPool *aio = NULL;
+ while (bytes != 0 && aio_task_pool_status(aio) == 0) {
/* prepare next request */
cur_bytes = MIN(bytes, INT_MAX);
if (s->crypto) {
ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
- goto fail;
+ goto out;
}
- offset_in_cluster = offset_into_cluster(s, offset);
-
- switch (ret) {
- case QCOW2_CLUSTER_UNALLOCATED:
-
- if (bs->backing) {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- ret = bdrv_co_preadv_part(bs->backing, offset, cur_bytes,
- qiov, qiov_offset, 0);
- if (ret < 0) {
- goto fail;
- }
- } else {
- /* Note: in this case, no need to wait */
- qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
- }
- break;
-
- case QCOW2_CLUSTER_ZERO_PLAIN:
- case QCOW2_CLUSTER_ZERO_ALLOC:
+ if (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
+ ret == QCOW2_CLUSTER_ZERO_ALLOC ||
+ (ret == QCOW2_CLUSTER_UNALLOCATED && !bs->backing))
+ {
qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
- break;
-
- case QCOW2_CLUSTER_COMPRESSED:
- ret = qcow2_co_preadv_compressed(bs, cluster_offset,
- offset, cur_bytes,
- qiov, qiov_offset);
- if (ret < 0) {
- goto fail;
- }
-
- break;
-
- case QCOW2_CLUSTER_NORMAL:
- if ((cluster_offset & 511) != 0) {
- ret = -EIO;
- goto fail;
+ } else {
+ if (!aio && cur_bytes != bytes) {
+ aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
}
-
- if (bs->encrypted) {
- assert(s->crypto);
-
- /*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
- if (!cluster_data) {
- cluster_data =
- qemu_try_blockalign(s->data_file->bs,
- QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size);
- if (cluster_data == NULL) {
- ret = -ENOMEM;
- goto fail;
- }
- }
-
- assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- ret = bdrv_co_pread(s->data_file,
- cluster_offset + offset_in_cluster,
- cur_bytes, cluster_data, 0);
- if (ret < 0) {
- goto fail;
- }
-
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- if (qcow2_co_decrypt(bs, cluster_offset, offset,
- cluster_data, cur_bytes) < 0) {
- ret = -EIO;
- goto fail;
- }
- qemu_iovec_from_buf(qiov, qiov_offset, cluster_data, cur_bytes);
- } else {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- ret = bdrv_co_preadv_part(s->data_file,
- cluster_offset + offset_in_cluster,
- cur_bytes, qiov, qiov_offset, 0);
- if (ret < 0) {
- goto fail;
- }
+ ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret,
+ cluster_offset, offset, cur_bytes,
+ qiov, qiov_offset, NULL);
+ if (ret < 0) {
+ goto out;
}
- break;
-
- default:
- g_assert_not_reached();
- ret = -EIO;
- goto fail;
}
bytes -= cur_bytes;
offset += cur_bytes;
qiov_offset += cur_bytes;
}
- ret = 0;
-fail:
- qemu_vfree(cluster_data);
+out:
+ if (aio) {
+ aio_task_pool_wait_all(aio);
+ if (ret == 0) {
+ ret = aio_task_pool_status(aio);
+ }
+ g_free(aio);
+ }
return ret;
}
return 0;
}
+/*
+ * qcow2_co_pwritev_task
+ * Called with s->lock unlocked
+ * l2meta - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
+ * not use it somehow after qcow2_co_pwritev_task() call
+ */
+static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
+ uint64_t file_cluster_offset,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov,
+ uint64_t qiov_offset,
+ QCowL2Meta *l2meta)
+{
+ int ret;
+ BDRVQcow2State *s = bs->opaque;
+ void *crypt_buf = NULL;
+ int offset_in_cluster = offset_into_cluster(s, offset);
+ QEMUIOVector encrypted_qiov;
+
+ if (bs->encrypted) {
+ assert(s->crypto);
+ assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
+ if (crypt_buf == NULL) {
+ ret = -ENOMEM;
+ goto out_unlocked;
+ }
+ qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
+
+ if (qcow2_co_encrypt(bs, file_cluster_offset + offset_in_cluster,
+ offset, crypt_buf, bytes) < 0)
+ {
+ ret = -EIO;
+ goto out_unlocked;
+ }
+
+ qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
+ qiov = &encrypted_qiov;
+ qiov_offset = 0;
+ }
+
+ /* Try to efficiently initialize the physical space with zeroes */
+ ret = handle_alloc_space(bs, l2meta);
+ if (ret < 0) {
+ goto out_unlocked;
+ }
+
+ /*
+ * If we need to do COW, check if it's possible to merge the
+ * writing of the guest data together with that of the COW regions.
+ * If it's not possible (or not necessary) then write the
+ * guest data now.
+ */
+ if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+ trace_qcow2_writev_data(qemu_coroutine_self(),
+ file_cluster_offset + offset_in_cluster);
+ ret = bdrv_co_pwritev_part(s->data_file,
+ file_cluster_offset + offset_in_cluster,
+ bytes, qiov, qiov_offset, 0);
+ if (ret < 0) {
+ goto out_unlocked;
+ }
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
+ goto out_locked;
+
+out_unlocked:
+ qemu_co_mutex_lock(&s->lock);
+
+out_locked:
+ qcow2_handle_l2meta(bs, &l2meta, false);
+ qemu_co_mutex_unlock(&s->lock);
+
+ qemu_vfree(crypt_buf);
+
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
+{
+ Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+ assert(!t->cluster_type);
+
+ return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset,
+ t->offset, t->bytes, t->qiov, t->qiov_offset,
+ t->l2meta);
+}
+
static coroutine_fn int qcow2_co_pwritev_part(
BlockDriverState *bs, uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset, int flags)
int ret;
unsigned int cur_bytes; /* number of sectors in current iteration */
uint64_t cluster_offset;
- QEMUIOVector encrypted_qiov;
- uint64_t bytes_done = 0;
- uint8_t *cluster_data = NULL;
QCowL2Meta *l2meta = NULL;
+ AioTaskPool *aio = NULL;
trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
- qemu_co_mutex_lock(&s->lock);
-
- while (bytes != 0) {
+ while (bytes != 0 && aio_task_pool_status(aio) == 0) {
l2meta = NULL;
- offset_in_cluster);
}
+ qemu_co_mutex_lock(&s->lock);
+
ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
&cluster_offset, &l2meta);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
- if (bs->encrypted) {
- assert(s->crypto);
- if (!cluster_data) {
- cluster_data = qemu_try_blockalign(bs->file->bs,
- QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size);
- if (cluster_data == NULL) {
- ret = -ENOMEM;
- goto out_unlocked;
- }
- }
-
- assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_to_buf(qiov, qiov_offset + bytes_done,
- cluster_data, cur_bytes);
-
- if (qcow2_co_encrypt(bs, cluster_offset, offset,
- cluster_data, cur_bytes) < 0) {
- ret = -EIO;
- goto out_unlocked;
- }
-
- qemu_iovec_init_buf(&encrypted_qiov, cluster_data, cur_bytes);
+ if (!aio && cur_bytes != bytes) {
+ aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
}
-
- /* Try to efficiently initialize the physical space with zeroes */
- ret = handle_alloc_space(bs, l2meta);
+ ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
+ cluster_offset, offset, cur_bytes,
+ qiov, qiov_offset, l2meta);
+ l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
if (ret < 0) {
- goto out_unlocked;
- }
-
- /* If we need to do COW, check if it's possible to merge the
- * writing of the guest data together with that of the COW regions.
- * If it's not possible (or not necessary) then write the
- * guest data now. */
- if (!merge_cow(offset, cur_bytes,
- bs->encrypted ? &encrypted_qiov : qiov,
- bs->encrypted ? 0 : qiov_offset + bytes_done, l2meta))
- {
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- trace_qcow2_writev_data(qemu_coroutine_self(),
- cluster_offset + offset_in_cluster);
- ret = bdrv_co_pwritev_part(
- s->data_file, cluster_offset + offset_in_cluster, cur_bytes,
- bs->encrypted ? &encrypted_qiov : qiov,
- bs->encrypted ? 0 : qiov_offset + bytes_done, 0);
- if (ret < 0) {
- goto out_unlocked;
- }
- }
-
- qemu_co_mutex_lock(&s->lock);
-
- ret = qcow2_handle_l2meta(bs, &l2meta, true);
- if (ret) {
- goto out_locked;
+ goto fail_nometa;
}
bytes -= cur_bytes;
offset += cur_bytes;
- bytes_done += cur_bytes;
+ qiov_offset += cur_bytes;
trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
}
ret = 0;
- goto out_locked;
-out_unlocked:
qemu_co_mutex_lock(&s->lock);
out_locked:
qemu_co_mutex_unlock(&s->lock);
- qemu_vfree(cluster_data);
+fail_nometa:
+ if (aio) {
+ aio_task_pool_wait_all(aio);
+ if (ret == 0) {
+ ret = aio_task_pool_status(aio);
+ }
+ g_free(aio);
+ }
+
trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
return ret;
int ret, result = 0;
Error *local_err = NULL;
- qcow2_store_persistent_dirty_bitmaps(bs, &local_err);
+ qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
if (local_err != NULL) {
result = -EINVAL;
error_reportf_err(local_err, "Lost persistent bitmaps during "
return 0;
}
+/*
+ * Upgrades an image's version. While newer versions encompass all
+ * features of older versions, some things may have to be presented
+ * differently.
+ */
+static int qcow2_upgrade(BlockDriverState *bs, int target_version,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int current_version = s->qcow_version;
+ int ret;
+
+ /* This is qcow2_upgrade(), not qcow2_downgrade() */
+ assert(target_version > current_version);
+
+ /* There are no other versions (yet) that you can upgrade to */
+ assert(target_version == 3);
+
+ status_cb(bs, 0, 1, cb_opaque);
+
+ s->qcow_version = target_version;
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ s->qcow_version = current_version;
+ error_setg_errno(errp, -ret, "Failed to update the image header");
+ return ret;
+ }
+ status_cb(bs, 1, 1, cb_opaque);
+
+ return 0;
+}
+
typedef enum Qcow2AmendOperation {
/* This is the value Qcow2AmendHelperCBInfo::last_operation will be
* statically initialized to so that the helper CB can discern the first
* invocation from an operation change */
QCOW2_NO_OPERATION = 0,
+ QCOW2_UPGRADING,
QCOW2_CHANGING_REFCOUNT_ORDER,
QCOW2_DOWNGRADING,
} Qcow2AmendOperation;
helper_cb_info = (Qcow2AmendHelperCBInfo){
.original_status_cb = status_cb,
.original_cb_opaque = cb_opaque,
- .total_operations = (new_version < old_version)
+ .total_operations = (new_version != old_version)
+ (s->refcount_bits != refcount_bits)
};
/* Upgrade first (some features may require compat=1.1) */
if (new_version > old_version) {
- s->qcow_version = new_version;
- ret = qcow2_update_header(bs);
+ helper_cb_info.current_operation = QCOW2_UPGRADING;
+ ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
+ &helper_cb_info, errp);
if (ret < 0) {
- s->qcow_version = old_version;
- error_setg_errno(errp, -ret, "Failed to update the image header");
return ret;
}
}
.bdrv_detach_aio_context = qcow2_detach_aio_context,
.bdrv_attach_aio_context = qcow2_attach_aio_context,
- .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
- .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
- .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap,
+ .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
+ .bdrv_co_remove_persistent_dirty_bitmap =
+ qcow2_co_remove_persistent_dirty_bitmap,
};
static void bdrv_qcow2_init(void)