X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/0459650d94d18218808fcabc8c3227d2ee99af39..c8e829b7bf6e1c84af8b4b13ee7fce2959c63e0e:/block/qed.c diff --git a/block/qed.c b/block/qed.c index 8802ad3845..80f18d82e2 100644 --- a/block/qed.c +++ b/block/qed.c @@ -18,21 +18,8 @@ #include "qapi/qmp/qerror.h" #include "migration/migration.h" -static void qed_aio_cancel(BlockDriverAIOCB *blockacb) -{ - QEDAIOCB *acb = (QEDAIOCB *)blockacb; - bool finished = false; - - /* Wait for the request to finish */ - acb->finished = &finished; - while (!finished) { - qemu_aio_wait(); - } -} - static const AIOCBInfo qed_aiocb_info = { .aiocb_size = sizeof(QEDAIOCB), - .cancel = qed_aio_cancel, }; static int bdrv_qed_probe(const uint8_t *buf, int buf_size, @@ -143,7 +130,7 @@ static void qed_write_header_read_cb(void *opaque, int ret) * This function only updates known header fields in-place and does not affect * extra data after the QED header. */ -static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb, +static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb, void *opaque) { /* We must write full sectors for O_DIRECT but cannot necessarily generate @@ -373,6 +360,27 @@ static void bdrv_qed_rebind(BlockDriverState *bs) s->bs = bs; } +static void bdrv_qed_detach_aio_context(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + qed_cancel_need_check_timer(s); + timer_free(s->need_check_timer); +} + +static void bdrv_qed_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + BDRVQEDState *s = bs->opaque; + + s->need_check_timer = aio_timer_new(new_context, + QEMU_CLOCK_VIRTUAL, SCALE_NS, + qed_need_check_timer_cb, s); + if (s->header.features & QED_F_NEED_CHECK) { + qed_start_need_check_timer(s); + } +} + static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -400,7 +408,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, snprintf(buf, sizeof(buf), "%" PRIx64, s->header.features & ~QED_FEATURE_MASK); error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, - bs->device_name, "QED", buf); + bdrv_get_device_name(bs), "QED", buf); return -ENOTSUP; } if (!qed_is_cluster_size_valid(s->header.cluster_size)) { @@ -496,8 +504,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, } } - s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - qed_need_check_timer_cb, s); + bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs)); out: if (ret) { @@ -507,13 +514,11 @@ out: return ret; } -static int bdrv_qed_refresh_limits(BlockDriverState *bs) +static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp) { BDRVQEDState *s = bs->opaque; bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS; - - return 0; } /* We have nothing to do for QED reopen, stubs just return @@ -528,8 +533,7 @@ static void bdrv_qed_close(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; - qed_cancel_need_check_timer(s); - timer_free(s->need_check_timer); + bdrv_qed_detach_aio_context(bs); /* Ensure writes reach stable storage */ bdrv_flush(bs->file); @@ -547,7 +551,7 @@ static void bdrv_qed_close(BlockDriverState *bs) static int qed_create(const char *filename, uint32_t cluster_size, uint64_t image_size, uint32_t table_size, const char *backing_file, const char *backing_fmt, - Error **errp) + QemuOpts *opts, Error **errp) { QEDHeader header = { .magic = QED_MAGIC, @@ -566,7 +570,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, int ret = 0; BlockDriverState *bs; - ret = bdrv_create_file(filename, NULL, &local_err); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); return ret; @@ -621,53 +625,54 @@ out: return ret; } -static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options, - Error **errp) +static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp) { uint64_t image_size = 0; uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE; uint32_t table_size = QED_DEFAULT_TABLE_SIZE; - const char *backing_file = NULL; - const char *backing_fmt = NULL; - - while (options && options->name) { - if (!strcmp(options->name, BLOCK_OPT_SIZE)) { - image_size = options->value.n; - } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { - backing_file = options->value.s; - } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) { - backing_fmt = options->value.s; - } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { - if (options->value.n) { - cluster_size = options->value.n; - } - } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) { - if (options->value.n) { - table_size = options->value.n; - } - } - options++; - } + char *backing_file = NULL; + char *backing_fmt = NULL; + int ret; + + image_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); + backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); + cluster_size = qemu_opt_get_size_del(opts, + BLOCK_OPT_CLUSTER_SIZE, + QED_DEFAULT_CLUSTER_SIZE); + table_size = qemu_opt_get_size_del(opts, BLOCK_OPT_TABLE_SIZE, + QED_DEFAULT_TABLE_SIZE); if (!qed_is_cluster_size_valid(cluster_size)) { - fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n", - QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE); - return -EINVAL; + error_setg(errp, "QED cluster size must be within range [%u, %u] " + "and power of 2", + QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE); + ret = -EINVAL; + goto finish; } if (!qed_is_table_size_valid(table_size)) { - fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n", - QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE); - return -EINVAL; + error_setg(errp, "QED table size must be within range [%u, %u] " + "and power of 2", + QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE); + ret = -EINVAL; + goto finish; } if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) { - fprintf(stderr, "QED image size must be a non-zero multiple of " - "cluster size and less than %" PRIu64 " bytes\n", - qed_max_image_size(cluster_size, table_size)); - return -EINVAL; + error_setg(errp, "QED image size must be a non-zero multiple of " + "cluster size and less than %" PRIu64 " bytes", + qed_max_image_size(cluster_size, table_size)); + ret = -EINVAL; + goto finish; } - return qed_create(filename, cluster_size, image_size, table_size, - backing_file, backing_fmt, errp); + ret = qed_create(filename, cluster_size, image_size, table_size, + backing_file, backing_fmt, opts, errp); + +finish: + g_free(backing_file); + g_free(backing_fmt); + return ret; } typedef struct { @@ -741,18 +746,20 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb) /** * Read from the backing file or zero-fill if no backing file * - * @s: QED state - * @pos: Byte position in device - * @qiov: Destination I/O vector - * @cb: Completion function - * @opaque: User data for completion function + * @s: QED state + * @pos: Byte position in device + * @qiov: Destination I/O vector + * @backing_qiov: Possibly shortened copy of qiov, to be allocated here + * @cb: Completion function + * @opaque: User data for completion function * * This function reads qiov->size bytes starting at pos from the backing file. * If there is no backing file then zeroes are read. */ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, QEMUIOVector *qiov, - BlockDriverCompletionFunc *cb, void *opaque) + QEMUIOVector **backing_qiov, + BlockCompletionFunc *cb, void *opaque) { uint64_t backing_length = 0; size_t size; @@ -784,15 +791,21 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, /* If the read straddles the end of the backing file, shorten it */ size = MIN((uint64_t)backing_length - pos, qiov->size); + assert(*backing_qiov == NULL); + *backing_qiov = g_new(QEMUIOVector, 1); + qemu_iovec_init(*backing_qiov, qiov->niov); + qemu_iovec_concat(*backing_qiov, qiov, 0, size); + BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE, - qiov, size / BDRV_SECTOR_SIZE, cb, opaque); + *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque); } typedef struct { GenericCB gencb; BDRVQEDState *s; QEMUIOVector qiov; + QEMUIOVector *backing_qiov; struct iovec iov; uint64_t offset; } CopyFromBackingFileCB; @@ -809,6 +822,12 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret) CopyFromBackingFileCB *copy_cb = opaque; BDRVQEDState *s = copy_cb->s; + if (copy_cb->backing_qiov) { + qemu_iovec_destroy(copy_cb->backing_qiov); + g_free(copy_cb->backing_qiov); + copy_cb->backing_qiov = NULL; + } + if (ret) { qed_copy_from_backing_file_cb(copy_cb, ret); return; @@ -832,7 +851,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret) */ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, uint64_t len, uint64_t offset, - BlockDriverCompletionFunc *cb, + BlockCompletionFunc *cb, void *opaque) { CopyFromBackingFileCB *copy_cb; @@ -846,11 +865,12 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque); copy_cb->s = s; copy_cb->offset = offset; + copy_cb->backing_qiov = NULL; copy_cb->iov.iov_base = qemu_blockalign(s->bs, len); copy_cb->iov.iov_len = len; qemu_iovec_init_external(©_cb->qiov, ©_cb->iov, 1); - qed_read_backing_file(s, pos, ©_cb->qiov, + qed_read_backing_file(s, pos, ©_cb->qiov, ©_cb->backing_qiov, qed_copy_from_backing_file_write, copy_cb); } @@ -882,21 +902,15 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, static void qed_aio_complete_bh(void *opaque) { QEDAIOCB *acb = opaque; - BlockDriverCompletionFunc *cb = acb->common.cb; + BlockCompletionFunc *cb = acb->common.cb; void *user_opaque = acb->common.opaque; int ret = acb->bh_ret; - bool *finished = acb->finished; qemu_bh_delete(acb->bh); - qemu_aio_release(acb); + qemu_aio_unref(acb); /* Invoke callback */ cb(user_opaque, ret); - - /* Signal cancel completion */ - if (finished) { - *finished = true; - } } static void qed_aio_complete(QEDAIOCB *acb, int ret) @@ -917,7 +931,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) /* Arrange for a bh to invoke the completion function */ acb->bh_ret = ret; - acb->bh = qemu_bh_new(qed_aio_complete_bh, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs), + qed_aio_complete_bh, acb); qemu_bh_schedule(acb->bh); /* Start next allocating write request waiting behind this one. Note that @@ -1049,7 +1064,7 @@ static void qed_aio_write_main(void *opaque, int ret) BDRVQEDState *s = acb_to_s(acb); uint64_t offset = acb->cur_cluster + qed_offset_into_cluster(s, acb->cur_pos); - BlockDriverCompletionFunc *next_fn; + BlockCompletionFunc *next_fn; trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size); @@ -1149,7 +1164,7 @@ static void qed_aio_write_zero_cluster(void *opaque, int ret) static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) { BDRVQEDState *s = acb_to_s(acb); - BlockDriverCompletionFunc *cb; + BlockCompletionFunc *cb; /* Cancel timer when the first allocating request comes in */ if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) { @@ -1206,7 +1221,11 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) struct iovec *iov = acb->qiov->iov; if (!iov->iov_base) { - iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len); + iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len); + if (iov->iov_base == NULL) { + qed_aio_complete(acb, -ENOMEM); + return; + } memset(iov->iov_base, 0, iov->iov_len); } } @@ -1292,7 +1311,7 @@ static void qed_aio_read_data(void *opaque, int ret, return; } else if (ret != QED_CLUSTER_FOUND) { qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, - qed_aio_next_io, acb); + &acb->backing_qiov, qed_aio_next_io, acb); return; } @@ -1318,6 +1337,12 @@ static void qed_aio_next_io(void *opaque, int ret) trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); + if (acb->backing_qiov) { + qemu_iovec_destroy(acb->backing_qiov); + g_free(acb->backing_qiov); + acb->backing_qiov = NULL; + } + /* Handle I/O error */ if (ret) { qed_aio_complete(acb, ret); @@ -1340,11 +1365,11 @@ static void qed_aio_next_io(void *opaque, int ret) io_fn, acb); } -static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, - void *opaque, int flags) +static BlockAIOCB *qed_aio_setup(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + BlockCompletionFunc *cb, + void *opaque, int flags) { QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque); @@ -1352,11 +1377,11 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, opaque, flags); acb->flags = flags; - acb->finished = NULL; acb->qiov = qiov; acb->qiov_offset = 0; acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; + acb->backing_qiov = NULL; acb->request.l2_table = NULL; qemu_iovec_init(&acb->cur_qiov, qiov->niov); @@ -1365,20 +1390,20 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, return &acb->common; } -static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, - void *opaque) +static BlockAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + BlockCompletionFunc *cb, + void *opaque) { return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); } -static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, - void *opaque) +static BlockAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + BlockCompletionFunc *cb, + void *opaque) { return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, QED_AIOCB_WRITE); @@ -1406,7 +1431,7 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs, int nb_sectors, BdrvRequestFlags flags) { - BlockDriverAIOCB *blockacb; + BlockAIOCB *blockacb; BDRVQEDState *s = bs->opaque; QEDWriteZeroesCB cb = { .done = false }; QEMUIOVector qiov; @@ -1558,13 +1583,31 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs, return ret; } -static void bdrv_qed_invalidate_cache(BlockDriverState *bs) +static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp) { BDRVQEDState *s = bs->opaque; + Error *local_err = NULL; + int ret; bdrv_qed_close(bs); + + bdrv_invalidate_cache(bs->file, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + memset(s, 0, sizeof(BDRVQEDState)); - bdrv_qed_open(bs, NULL, bs->open_flags, NULL); + ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err); + if (local_err) { + error_setg(errp, "Could not reopen qed layer: %s", + error_get_pretty(local_err)); + error_free(local_err); + return; + } else if (ret < 0) { + error_setg_errno(errp, -ret, "Could not reopen qed layer"); + return; + } } static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result, @@ -1575,36 +1618,45 @@ static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result, return qed_check(s, result, !!fix); } -static QEMUOptionParameter qed_create_options[] = { - { - .name = BLOCK_OPT_SIZE, - .type = OPT_SIZE, - .help = "Virtual disk size (in bytes)" - }, { - .name = BLOCK_OPT_BACKING_FILE, - .type = OPT_STRING, - .help = "File name of a base image" - }, { - .name = BLOCK_OPT_BACKING_FMT, - .type = OPT_STRING, - .help = "Image format of the base image" - }, { - .name = BLOCK_OPT_CLUSTER_SIZE, - .type = OPT_SIZE, - .help = "Cluster size (in bytes)", - .value = { .n = QED_DEFAULT_CLUSTER_SIZE }, - }, { - .name = BLOCK_OPT_TABLE_SIZE, - .type = OPT_SIZE, - .help = "L1/L2 table size (in clusters)" - }, - { /* end of list */ } +static QemuOptsList qed_create_opts = { + .name = "qed-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(qed_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_BACKING_FILE, + .type = QEMU_OPT_STRING, + .help = "File name of a base image" + }, + { + .name = BLOCK_OPT_BACKING_FMT, + .type = QEMU_OPT_STRING, + .help = "Image format of the base image" + }, + { + .name = BLOCK_OPT_CLUSTER_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Cluster size (in bytes)", + .def_value_str = stringify(QED_DEFAULT_CLUSTER_SIZE) + }, + { + .name = BLOCK_OPT_TABLE_SIZE, + .type = QEMU_OPT_SIZE, + .help = "L1/L2 table size (in clusters)" + }, + { /* end of list */ } + } }; static BlockDriver bdrv_qed = { .format_name = "qed", .instance_size = sizeof(BDRVQEDState), - .create_options = qed_create_options, + .create_opts = &qed_create_opts, + .supports_backing = true, .bdrv_probe = bdrv_qed_probe, .bdrv_rebind = bdrv_qed_rebind, @@ -1624,6 +1676,8 @@ static BlockDriver bdrv_qed = { .bdrv_change_backing_file = bdrv_qed_change_backing_file, .bdrv_invalidate_cache = bdrv_qed_invalidate_cache, .bdrv_check = bdrv_qed_check, + .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, + .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, }; static void bdrv_qed_init(void)