#include "block/qdict.h"
#include "sysemu/block-backend.h"
+#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "qcow2.h"
#include "qemu/error-report.h"
#include "qapi/qobject-input-visitor.h"
#include "qapi/qapi-visit-block-core.h"
#include "crypto.h"
+#include "block/aio_task.h"
/*
Differences with QCOW:
uint64_t file_cluster_offset,
uint64_t offset,
uint64_t bytes,
- QEMUIOVector *qiov);
+ QEMUIOVector *qiov,
+ size_t qiov_offset);
static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
{
bool l2_cache_entry_size_set;
int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
+ uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
+ /* An L2 table is always one cluster in size so the max cache size
+ * should be a multiple of the cluster size. */
+ uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t),
+ s->cluster_size);
combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
s->snapshots_offset = header.snapshots_offset;
s->nb_snapshots = header.nb_snapshots;
- ret = qcow2_read_snapshots(bs);
+ ret = qcow2_read_snapshots(bs, errp);
if (ret < 0) {
- error_setg_errno(errp, -ret, "Could not read snapshots");
goto fail;
}
static void qcow2_reopen_commit(BDRVReopenState *state)
{
qcow2_update_options_commit(state->bs, state->opaque);
+ if (state->flags & BDRV_O_RDWR) {
+ Error *local_err = NULL;
+
+ if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
+ /*
+ * This is not fatal, bitmaps just left read-only, so all following
+ * writes will fail. User can remove read-only bitmaps to unblock
+ * writes or retry reopen.
+ */
+ error_reportf_err(local_err,
+ "%s: Failed to make dirty bitmaps writable: ",
+ bdrv_get_node_name(state->bs));
+ }
+ }
g_free(state->opaque);
}
unsigned int bytes;
int status = 0;
+ qemu_co_mutex_lock(&s->lock);
+
if (!s->metadata_preallocation_checked) {
ret = qcow2_detect_metadata_preallocation(bs);
s->metadata_preallocation = (ret == 1);
}
bytes = MIN(INT_MAX, count);
- qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
return ret;
}
-static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov,
- int flags)
+static coroutine_fn int
+qcow2_co_preadv_encrypted(BlockDriverState *bs,
+ uint64_t file_cluster_offset,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov,
+ uint64_t qiov_offset)
{
- BDRVQcow2State *s = bs->opaque;
- int offset_in_cluster;
int ret;
- unsigned int cur_bytes; /* number of bytes in current iteration */
- uint64_t cluster_offset = 0;
- uint64_t bytes_done = 0;
- QEMUIOVector hd_qiov;
- uint8_t *cluster_data = NULL;
+ BDRVQcow2State *s = bs->opaque;
+ uint8_t *buf;
- qemu_iovec_init(&hd_qiov, qiov->niov);
+ assert(bs->encrypted && s->crypto);
+ assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- while (bytes != 0) {
+ /*
+ * For encrypted images, read everything into a temporary
+ * contiguous buffer on which the AES functions can work.
+ * Also, decryption in a separate buffer is better as it
+ * prevents the guest from learning information about the
+ * encrypted nature of the virtual disk.
+ */
- /* prepare next request */
- cur_bytes = MIN(bytes, INT_MAX);
- if (s->crypto) {
- cur_bytes = MIN(cur_bytes,
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ buf = qemu_try_blockalign(s->data_file->bs, bytes);
+ if (buf == NULL) {
+ return -ENOMEM;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ ret = bdrv_co_pread(s->data_file,
+ file_cluster_offset + offset_into_cluster(s, offset),
+ bytes, buf, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
+ if (qcow2_co_decrypt(bs,
+ file_cluster_offset + offset_into_cluster(s, offset),
+ offset, buf, bytes) < 0)
+ {
+ ret = -EIO;
+ goto fail;
+ }
+ qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
+
+fail:
+ qemu_vfree(buf);
+
+ return ret;
+}
+
+typedef struct Qcow2AioTask {
+ AioTask task;
+
+ BlockDriverState *bs;
+ QCow2ClusterType cluster_type; /* only for read */
+ uint64_t file_cluster_offset;
+ uint64_t offset;
+ uint64_t bytes;
+ QEMUIOVector *qiov;
+ uint64_t qiov_offset;
+ QCowL2Meta *l2meta; /* only for write */
+} Qcow2AioTask;
+
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
+static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
+ AioTaskPool *pool,
+ AioTaskFunc func,
+ QCow2ClusterType cluster_type,
+ uint64_t file_cluster_offset,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov,
+ size_t qiov_offset,
+ QCowL2Meta *l2meta)
+{
+ Qcow2AioTask local_task;
+ Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
+
+ *task = (Qcow2AioTask) {
+ .task.func = func,
+ .bs = bs,
+ .cluster_type = cluster_type,
+ .qiov = qiov,
+ .file_cluster_offset = file_cluster_offset,
+ .offset = offset,
+ .bytes = bytes,
+ .qiov_offset = qiov_offset,
+ .l2meta = l2meta,
+ };
+
+ trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
+ func == qcow2_co_preadv_task_entry ? "read" : "write",
+ cluster_type, file_cluster_offset, offset, bytes,
+ qiov, qiov_offset);
+
+ if (!pool) {
+ return func(&task->task);
+ }
+
+ aio_task_pool_start_task(pool, &task->task);
+
+ return 0;
+}
+
+static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
+ QCow2ClusterType cluster_type,
+ uint64_t file_cluster_offset,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov,
+ size_t qiov_offset)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int offset_in_cluster = offset_into_cluster(s, offset);
+
+ switch (cluster_type) {
+ case QCOW2_CLUSTER_ZERO_PLAIN:
+ case QCOW2_CLUSTER_ZERO_ALLOC:
+ /* Both zero types are handled in qcow2_co_preadv_part */
+ g_assert_not_reached();
+
+ case QCOW2_CLUSTER_UNALLOCATED:
+ assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+ return bdrv_co_preadv_part(bs->backing, offset, bytes,
+ qiov, qiov_offset, 0);
+
+ case QCOW2_CLUSTER_COMPRESSED:
+ return qcow2_co_preadv_compressed(bs, file_cluster_offset,
+ offset, bytes, qiov, qiov_offset);
+
+ case QCOW2_CLUSTER_NORMAL:
+ if ((file_cluster_offset & 511) != 0) {
+ return -EIO;
}
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
- qemu_co_mutex_unlock(&s->lock);
- if (ret < 0) {
- goto fail;
+ if (bs->encrypted) {
+ return qcow2_co_preadv_encrypted(bs, file_cluster_offset,
+ offset, bytes, qiov, qiov_offset);
}
- offset_in_cluster = offset_into_cluster(s, offset);
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ return bdrv_co_preadv_part(s->data_file,
+ file_cluster_offset + offset_in_cluster,
+ bytes, qiov, qiov_offset, 0);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
+ default:
+ g_assert_not_reached();
+ }
- switch (ret) {
- case QCOW2_CLUSTER_UNALLOCATED:
+ g_assert_not_reached();
+}
- if (bs->backing) {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
- &hd_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- } else {
- /* Note: in this case, no need to wait */
- qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
- }
- break;
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
+{
+ Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
- case QCOW2_CLUSTER_ZERO_PLAIN:
- case QCOW2_CLUSTER_ZERO_ALLOC:
- qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
- break;
+ assert(!t->l2meta);
- case QCOW2_CLUSTER_COMPRESSED:
- ret = qcow2_co_preadv_compressed(bs, cluster_offset,
- offset, cur_bytes,
- &hd_qiov);
- if (ret < 0) {
- goto fail;
- }
+ return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset,
+ t->offset, t->bytes, t->qiov, t->qiov_offset);
+}
- break;
+static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov,
+ size_t qiov_offset, int flags)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int ret = 0;
+ unsigned int cur_bytes; /* number of bytes in current iteration */
+ uint64_t cluster_offset = 0;
+ AioTaskPool *aio = NULL;
- case QCOW2_CLUSTER_NORMAL:
- if ((cluster_offset & 511) != 0) {
- ret = -EIO;
- goto fail;
- }
+ while (bytes != 0 && aio_task_pool_status(aio) == 0) {
+ /* prepare next request */
+ cur_bytes = MIN(bytes, INT_MAX);
+ if (s->crypto) {
+ cur_bytes = MIN(cur_bytes,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ }
- if (bs->encrypted) {
- assert(s->crypto);
-
- /*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
- if (!cluster_data) {
- cluster_data =
- qemu_try_blockalign(s->data_file->bs,
- QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size);
- if (cluster_data == NULL) {
- ret = -ENOMEM;
- goto fail;
- }
- }
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
+ qemu_co_mutex_unlock(&s->lock);
+ if (ret < 0) {
+ goto out;
+ }
- assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
+ if (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
+ ret == QCOW2_CLUSTER_ZERO_ALLOC ||
+ (ret == QCOW2_CLUSTER_UNALLOCATED && !bs->backing))
+ {
+ qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
+ } else {
+ if (!aio && cur_bytes != bytes) {
+ aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
}
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- ret = bdrv_co_preadv(s->data_file,
- cluster_offset + offset_in_cluster,
- cur_bytes, &hd_qiov, 0);
+ ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret,
+ cluster_offset, offset, cur_bytes,
+ qiov, qiov_offset, NULL);
if (ret < 0) {
- goto fail;
- }
- if (bs->encrypted) {
- assert(s->crypto);
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- if (qcow2_co_decrypt(bs, cluster_offset, offset,
- cluster_data, cur_bytes) < 0) {
- ret = -EIO;
- goto fail;
- }
- qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
+ goto out;
}
- break;
-
- default:
- g_assert_not_reached();
- ret = -EIO;
- goto fail;
}
bytes -= cur_bytes;
offset += cur_bytes;
- bytes_done += cur_bytes;
+ qiov_offset += cur_bytes;
}
- ret = 0;
-fail:
- qemu_iovec_destroy(&hd_qiov);
- qemu_vfree(cluster_data);
+out:
+ if (aio) {
+ aio_task_pool_wait_all(aio);
+ if (ret == 0) {
+ ret = aio_task_pool_status(aio);
+ }
+ g_free(aio);
+ }
return ret;
}
/* Check if it's possible to merge a write request with the writing of
* the data from the COW regions */
static bool merge_cow(uint64_t offset, unsigned bytes,
- QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
+ QEMUIOVector *qiov, size_t qiov_offset,
+ QCowL2Meta *l2meta)
{
QCowL2Meta *m;
/* Make sure that adding both COW regions to the QEMUIOVector
* does not exceed IOV_MAX */
- if (hd_qiov->niov > IOV_MAX - 2) {
+ if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
continue;
}
- m->data_qiov = hd_qiov;
+ m->data_qiov = qiov;
+ m->data_qiov_offset = qiov_offset;
return true;
}
return 0;
}
-static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov,
- int flags)
+/*
+ * qcow2_co_pwritev_task
+ * Called with s->lock unlocked
+ * l2meta - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
+ * not use it somehow after qcow2_co_pwritev_task() call
+ */
+static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
+ uint64_t file_cluster_offset,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov,
+ uint64_t qiov_offset,
+ QCowL2Meta *l2meta)
+{
+ int ret;
+ BDRVQcow2State *s = bs->opaque;
+ void *crypt_buf = NULL;
+ int offset_in_cluster = offset_into_cluster(s, offset);
+ QEMUIOVector encrypted_qiov;
+
+ if (bs->encrypted) {
+ assert(s->crypto);
+ assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
+ if (crypt_buf == NULL) {
+ ret = -ENOMEM;
+ goto out_unlocked;
+ }
+ qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
+
+ if (qcow2_co_encrypt(bs, file_cluster_offset + offset_in_cluster,
+ offset, crypt_buf, bytes) < 0)
+ {
+ ret = -EIO;
+ goto out_unlocked;
+ }
+
+ qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
+ qiov = &encrypted_qiov;
+ qiov_offset = 0;
+ }
+
+ /* Try to efficiently initialize the physical space with zeroes */
+ ret = handle_alloc_space(bs, l2meta);
+ if (ret < 0) {
+ goto out_unlocked;
+ }
+
+ /*
+ * If we need to do COW, check if it's possible to merge the
+ * writing of the guest data together with that of the COW regions.
+ * If it's not possible (or not necessary) then write the
+ * guest data now.
+ */
+ if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+ trace_qcow2_writev_data(qemu_coroutine_self(),
+ file_cluster_offset + offset_in_cluster);
+ ret = bdrv_co_pwritev_part(s->data_file,
+ file_cluster_offset + offset_in_cluster,
+ bytes, qiov, qiov_offset, 0);
+ if (ret < 0) {
+ goto out_unlocked;
+ }
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
+ goto out_locked;
+
+out_unlocked:
+ qemu_co_mutex_lock(&s->lock);
+
+out_locked:
+ qcow2_handle_l2meta(bs, &l2meta, false);
+ qemu_co_mutex_unlock(&s->lock);
+
+ qemu_vfree(crypt_buf);
+
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
+{
+ Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+ assert(!t->cluster_type);
+
+ return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset,
+ t->offset, t->bytes, t->qiov, t->qiov_offset,
+ t->l2meta);
+}
+
+static coroutine_fn int qcow2_co_pwritev_part(
+ BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BDRVQcow2State *s = bs->opaque;
int offset_in_cluster;
int ret;
unsigned int cur_bytes; /* number of sectors in current iteration */
uint64_t cluster_offset;
- QEMUIOVector hd_qiov;
- uint64_t bytes_done = 0;
- uint8_t *cluster_data = NULL;
QCowL2Meta *l2meta = NULL;
+ AioTaskPool *aio = NULL;
trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
- qemu_co_mutex_lock(&s->lock);
-
- while (bytes != 0) {
+ while (bytes != 0 && aio_task_pool_status(aio) == 0) {
l2meta = NULL;
- offset_in_cluster);
}
+ qemu_co_mutex_lock(&s->lock);
+
ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
&cluster_offset, &l2meta);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
-
- if (bs->encrypted) {
- assert(s->crypto);
- if (!cluster_data) {
- cluster_data = qemu_try_blockalign(bs->file->bs,
- QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size);
- if (cluster_data == NULL) {
- ret = -ENOMEM;
- goto out_unlocked;
- }
- }
-
- assert(hd_qiov.size <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
-
- if (qcow2_co_encrypt(bs, cluster_offset, offset,
- cluster_data, cur_bytes) < 0) {
- ret = -EIO;
- goto out_unlocked;
- }
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
+ if (!aio && cur_bytes != bytes) {
+ aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
}
-
- /* Try to efficiently initialize the physical space with zeroes */
- ret = handle_alloc_space(bs, l2meta);
+ ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
+ cluster_offset, offset, cur_bytes,
+ qiov, qiov_offset, l2meta);
+ l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
if (ret < 0) {
- goto out_unlocked;
- }
-
- /* If we need to do COW, check if it's possible to merge the
- * writing of the guest data together with that of the COW regions.
- * If it's not possible (or not necessary) then write the
- * guest data now. */
- if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- trace_qcow2_writev_data(qemu_coroutine_self(),
- cluster_offset + offset_in_cluster);
- ret = bdrv_co_pwritev(s->data_file,
- cluster_offset + offset_in_cluster,
- cur_bytes, &hd_qiov, 0);
- if (ret < 0) {
- goto out_unlocked;
- }
- }
-
- qemu_co_mutex_lock(&s->lock);
-
- ret = qcow2_handle_l2meta(bs, &l2meta, true);
- if (ret) {
- goto out_locked;
+ goto fail_nometa;
}
bytes -= cur_bytes;
offset += cur_bytes;
- bytes_done += cur_bytes;
+ qiov_offset += cur_bytes;
trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
}
ret = 0;
- goto out_locked;
-out_unlocked:
qemu_co_mutex_lock(&s->lock);
out_locked:
qemu_co_mutex_unlock(&s->lock);
- qemu_iovec_destroy(&hd_qiov);
- qemu_vfree(cluster_data);
+fail_nometa:
+ if (aio) {
+ aio_task_pool_wait_all(aio);
+ if (ret == 0) {
+ ret = aio_task_pool_status(aio);
+ }
+ g_free(aio);
+ }
+
trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
return ret;
int ret, result = 0;
Error *local_err = NULL;
- qcow2_store_persistent_dirty_bitmaps(bs, &local_err);
+ qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
if (local_err != NULL) {
result = -EINVAL;
error_reportf_err(local_err, "Lost persistent bitmaps during "
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static coroutine_fn int
-qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov)
+qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset)
{
BDRVQcow2State *s = bs->opaque;
int ret;
/* Zero-pad last write if image size is not cluster aligned */
memset(buf + bytes, 0, s->cluster_size - bytes);
}
- qemu_iovec_to_buf(qiov, 0, buf, bytes);
+ qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
out_buf = g_malloc(s->cluster_size);
buf, s->cluster_size);
if (out_len == -ENOMEM) {
/* could not compress: write normal cluster */
- ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
+ ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
if (ret < 0) {
goto fail;
}
uint64_t file_cluster_offset,
uint64_t offset,
uint64_t bytes,
- QEMUIOVector *qiov)
+ QEMUIOVector *qiov,
+ size_t qiov_offset)
{
BDRVQcow2State *s = bs->opaque;
int ret = 0, csize, nb_csectors;
goto fail;
}
- qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes);
+ qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
fail:
qemu_vfree(out_buf);
return spec_info;
}
+static int qcow2_has_zero_init(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+ bool preallocated;
+
+ if (qemu_in_coroutine()) {
+ qemu_co_mutex_lock(&s->lock);
+ }
+ /*
+ * Check preallocation status: Preallocated images have all L2
+ * tables allocated, nonpreallocated images have none. It is
+ * therefore enough to check the first one.
+ */
+ preallocated = s->l1_size > 0 && s->l1_table[0] != 0;
+ if (qemu_in_coroutine()) {
+ qemu_co_mutex_unlock(&s->lock);
+ }
+
+ if (!preallocated) {
+ return 1;
+ } else if (bs->encrypted) {
+ return 0;
+ } else {
+ return bdrv_has_zero_init(s->data_file->bs);
+ }
+}
+
static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
{
BDRVQcow2State *s = bs->opaque;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
- return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
- qiov->size, qiov, 0);
+ return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0, 0);
}
static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
BDRVQcow2State *s = bs->opaque;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
- return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
- qiov->size, qiov, 0);
+ return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0, 0);
}
/*
return 0;
}
+/*
+ * Upgrades an image's version. While newer versions encompass all
+ * features of older versions, some things may have to be presented
+ * differently.
+ */
+static int qcow2_upgrade(BlockDriverState *bs, int target_version,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int current_version = s->qcow_version;
+ int ret;
+
+ /* This is qcow2_upgrade(), not qcow2_downgrade() */
+ assert(target_version > current_version);
+
+ /* There are no other versions (yet) that you can upgrade to */
+ assert(target_version == 3);
+
+ status_cb(bs, 0, 1, cb_opaque);
+
+ s->qcow_version = target_version;
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ s->qcow_version = current_version;
+ error_setg_errno(errp, -ret, "Failed to update the image header");
+ return ret;
+ }
+ status_cb(bs, 1, 1, cb_opaque);
+
+ return 0;
+}
+
typedef enum Qcow2AmendOperation {
/* This is the value Qcow2AmendHelperCBInfo::last_operation will be
* statically initialized to so that the helper CB can discern the first
* invocation from an operation change */
QCOW2_NO_OPERATION = 0,
+ QCOW2_UPGRADING,
QCOW2_CHANGING_REFCOUNT_ORDER,
QCOW2_DOWNGRADING,
} Qcow2AmendOperation;
compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
if (!compat) {
/* preserve default */
- } else if (!strcmp(compat, "0.10")) {
+ } else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) {
new_version = 2;
- } else if (!strcmp(compat, "1.1")) {
+ } else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) {
new_version = 3;
} else {
error_setg(errp, "Unknown compatibility level %s", compat);
helper_cb_info = (Qcow2AmendHelperCBInfo){
.original_status_cb = status_cb,
.original_cb_opaque = cb_opaque,
- .total_operations = (new_version < old_version)
+ .total_operations = (new_version != old_version)
+ (s->refcount_bits != refcount_bits)
};
/* Upgrade first (some features may require compat=1.1) */
if (new_version > old_version) {
- s->qcow_version = new_version;
- ret = qcow2_update_header(bs);
+ helper_cb_info.current_operation = QCOW2_UPGRADING;
+ ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
+ &helper_cb_info, errp);
if (ret < 0) {
- s->qcow_version = old_version;
- error_setg_errno(errp, -ret, "Failed to update the image header");
return ret;
}
}
{
.name = BLOCK_OPT_COMPAT_LEVEL,
.type = QEMU_OPT_STRING,
- .help = "Compatibility level (0.10 or 1.1)"
+ .help = "Compatibility level (v2 [0.10] or v3 [1.1])"
},
{
.name = BLOCK_OPT_BACKING_FILE,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_create_opts = qcow2_co_create_opts,
.bdrv_co_create = qcow2_co_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_has_zero_init = qcow2_has_zero_init,
+ .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_co_block_status = qcow2_co_block_status,
- .bdrv_co_preadv = qcow2_co_preadv,
- .bdrv_co_pwritev = qcow2_co_pwritev,
+ .bdrv_co_preadv_part = qcow2_co_preadv_part,
+ .bdrv_co_pwritev_part = qcow2_co_pwritev_part,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
.bdrv_co_copy_range_from = qcow2_co_copy_range_from,
.bdrv_co_copy_range_to = qcow2_co_copy_range_to,
.bdrv_co_truncate = qcow2_co_truncate,
- .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
+ .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
.bdrv_make_empty = qcow2_make_empty,
.bdrv_snapshot_create = qcow2_snapshot_create,
.bdrv_detach_aio_context = qcow2_detach_aio_context,
.bdrv_attach_aio_context = qcow2_attach_aio_context,
- .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
- .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
- .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap,
+ .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
+ .bdrv_co_remove_persistent_dirty_bitmap =
+ qcow2_co_remove_persistent_dirty_bitmap,
};
static void bdrv_qcow2_init(void)