X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/a7f53e26a6f5bd64cda617cbcca118601d9a01d9..0ee4de5840ccc1072459ec68062bfb63c888a94d:/block.c diff --git a/block.c b/block.c index 773e87ef08..cbe4a32a5a 100644 --- a/block.c +++ b/block.c @@ -97,6 +97,10 @@ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); +static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors); +static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors); /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -229,7 +233,7 @@ size_t bdrv_opt_mem_align(BlockDriverState *bs) } /* check if the path starts with ":" */ -static int path_has_protocol(const char *path) +int path_has_protocol(const char *path) { const char *p; @@ -303,15 +307,32 @@ void path_combine(char *dest, int dest_size, } } -void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz) +void bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + char *dest, size_t sz, + Error **errp) { - if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) { - pstrcpy(dest, sz, bs->backing_file); + if (backing[0] == '\0' || path_has_protocol(backing) || + path_is_absolute(backing)) + { + pstrcpy(dest, sz, backing); + } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { + error_setg(errp, "Cannot use relative backing file names for '%s'", + backed); } else { - path_combine(dest, sz, bs->filename, bs->backing_file); + path_combine(dest, sz, backed, backing); } } +void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, + Error **errp) +{ + char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; + + bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, + dest, sz, errp); +} + void bdrv_register(BlockDriver *bdrv) { /* Block drivers without coroutine functions need emulation */ @@ -519,6 +540,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) return; } bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; + bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; } else { bs->bl.opt_mem_alignment = 512; @@ -533,6 +555,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.opt_transfer_length = MAX(bs->bl.opt_transfer_length, bs->backing_hd->bl.opt_transfer_length); + bs->bl.max_transfer_length = + MIN_NON_ZERO(bs->bl.max_transfer_length, + bs->backing_hd->bl.max_transfer_length); bs->bl.opt_mem_alignment = MAX(bs->bl.opt_mem_alignment, bs->backing_hd->bl.opt_mem_alignment); @@ -625,7 +650,7 @@ BlockDriver *bdrv_find_protocol(const char *filename, } if (!path_has_protocol(filename) || !allow_protocol_prefix) { - return bdrv_find_format("file"); + return &bdrv_file; } p = strchr(filename, ':'); @@ -644,22 +669,49 @@ BlockDriver *bdrv_find_protocol(const char *filename, return NULL; } +/* + * Guess image format by probing its contents. + * This is not a good idea when your image is raw (CVE-2008-2004), but + * we do it anyway for backward compatibility. + * + * @buf contains the image's first @buf_size bytes. + * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, + * but can be smaller if the image file is smaller) + * @filename is its filename. + * + * For all block drivers, call the bdrv_probe() method to get its + * probing score. + * Return the first block driver with the highest probing score. + */ +BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, + const char *filename) +{ + int score_max = 0, score; + BlockDriver *drv = NULL, *d; + + QLIST_FOREACH(d, &bdrv_drivers, list) { + if (d->bdrv_probe) { + score = d->bdrv_probe(buf, buf_size, filename); + if (score > score_max) { + score_max = score; + drv = d; + } + } + } + + return drv; +} + static int find_image_format(BlockDriverState *bs, const char *filename, BlockDriver **pdrv, Error **errp) { - int score, score_max; - BlockDriver *drv1, *drv; - uint8_t buf[2048]; + BlockDriver *drv; + uint8_t buf[BLOCK_PROBE_BUF_SIZE]; int ret = 0; /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { - drv = bdrv_find_format("raw"); - if (!drv) { - error_setg(errp, "Could not find raw image format"); - ret = -ENOENT; - } - *pdrv = drv; + *pdrv = &bdrv_raw; return ret; } @@ -671,17 +723,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename, return ret; } - score_max = 0; - drv = NULL; - QLIST_FOREACH(drv1, &bdrv_drivers, list) { - if (drv1->bdrv_probe) { - score = drv1->bdrv_probe(buf, ret, filename); - if (score > score_max) { - score_max = score; - drv = drv1; - } - } - } + drv = bdrv_probe_all(buf, ret, filename); if (!drv) { error_setg(errp, "Could not determine image format: No compatible " "driver found"); @@ -1158,7 +1200,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); /* Otherwise we won't be able to commit due to check in bdrv_commit */ - bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, + bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, bs->backing_blocker); out: bdrv_refresh_limits(bs, NULL); @@ -1176,7 +1218,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) { char *backing_filename = g_malloc0(PATH_MAX); int ret = 0; - BlockDriver *back_drv = NULL; BlockDriverState *backing_hd; Error *local_err = NULL; @@ -1197,7 +1238,14 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) QDECREF(options); goto free_exit; } else { - bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX); + bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + QDECREF(options); + goto free_exit; + } } if (!bs->drv || !bs->drv->supports_backing) { @@ -1209,14 +1257,14 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) backing_hd = bdrv_new(); - if (bs->backing_format[0] != '\0') { - back_drv = bdrv_find_format(bs->backing_format); + if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { + qdict_put(options, "driver", qstring_from_str(bs->backing_format)); } assert(bs->backing_hd == NULL); ret = bdrv_open(&backing_hd, *backing_filename ? backing_filename : NULL, NULL, options, - bdrv_backing_flags(bs->open_flags), back_drv, &local_err); + bdrv_backing_flags(bs->open_flags), NULL, &local_err); if (ret < 0) { bdrv_unref(backing_hd); backing_hd = NULL; @@ -1290,7 +1338,6 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ char *tmp_filename = g_malloc0(PATH_MAX + 1); int64_t total_size; - BlockDriver *bdrv_qcow2; QemuOpts *opts = NULL; QDict *snapshot_options; BlockDriverState *bs_snapshot; @@ -1315,11 +1362,10 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) goto out; } - bdrv_qcow2 = bdrv_find_format("qcow2"); - opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0, + opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, &error_abort); qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size); - ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err); + ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); qemu_opts_del(opts); if (ret < 0) { error_setg_errno(errp, -ret, "Could not create temporary overlay " @@ -1339,7 +1385,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) bs_snapshot = bdrv_new(); ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, - flags, bdrv_qcow2, &local_err); + flags, &bdrv_qcow2, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; @@ -1463,6 +1509,7 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, } /* Image format probing */ + bs->probed = !drv; if (!drv && file) { ret = find_image_format(file, filename, &drv, &local_err); if (ret < 0) { @@ -1900,6 +1947,34 @@ static bool bdrv_requests_pending(BlockDriverState *bs) return false; } +static bool bdrv_drain_one(BlockDriverState *bs) +{ + bool bs_busy; + + bdrv_flush_io_queue(bs); + bdrv_start_throttled_reqs(bs); + bs_busy = bdrv_requests_pending(bs); + bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy); + return bs_busy; +} + +/* + * Wait for pending requests to complete on a single BlockDriverState subtree + * + * See the warning in bdrv_drain_all(). This function can only be called if + * you are sure nothing can generate I/O because you have op blockers + * installed. + * + * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState + * AioContext. + */ +void bdrv_drain(BlockDriverState *bs) +{ + while (bdrv_drain_one(bs)) { + /* Keep iterating */ + } +} + /* * Wait for pending requests to complete across all BlockDriverStates * @@ -1923,16 +1998,10 @@ void bdrv_drain_all(void) QTAILQ_FOREACH(bs, &bdrv_states, device_list) { AioContext *aio_context = bdrv_get_aio_context(bs); - bool bs_busy; aio_context_acquire(aio_context); - bdrv_flush_io_queue(bs); - bdrv_start_throttled_reqs(bs); - bs_busy = bdrv_requests_pending(bs); - bs_busy |= aio_poll(aio_context, bs_busy); + busy |= bdrv_drain_one(bs); aio_context_release(aio_context); - - busy |= bs_busy; } } } @@ -2147,8 +2216,8 @@ int bdrv_commit(BlockDriverState *bs) return -ENOTSUP; } - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) || - bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || + bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { return -EBUSY; } @@ -2764,8 +2833,8 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) if (nb_sectors <= 0) { return 0; } - if (nb_sectors > INT_MAX) { - nb_sectors = INT_MAX; + if (nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) { + nb_sectors = INT_MAX / BDRV_SECTOR_SIZE; } ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n); if (ret < 0) { @@ -2993,18 +3062,16 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), align >> BDRV_SECTOR_BITS); - if (max_nb_sectors > 0) { + if (nb_sectors < max_nb_sectors) { + ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + } else if (max_nb_sectors > 0) { QEMUIOVector local_qiov; - size_t local_sectors; - - max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS); - local_sectors = MIN(max_nb_sectors, nb_sectors); qemu_iovec_init(&local_qiov, qiov->niov); qemu_iovec_concat(&local_qiov, qiov, 0, - local_sectors * BDRV_SECTOR_SIZE); + max_nb_sectors * BDRV_SECTOR_SIZE); - ret = drv->bdrv_co_readv(bs, sector_num, local_sectors, + ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors, &local_qiov); qemu_iovec_destroy(&local_qiov); @@ -3177,6 +3244,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, if (ret == -ENOTSUP) { /* Fall back to bounce buffer if write zeroes is unsupported */ + int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, + MAX_WRITE_ZEROES_DEFAULT); + num = MIN(num, max_xfer_len); iov.iov_len = num * BDRV_SECTOR_SIZE; if (iov.iov_base == NULL) { iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE); @@ -3193,7 +3263,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, /* Keep bounce buffer around if it is big enough for all * all future requests. */ - if (num < max_write_zeroes) { + if (num < max_xfer_len) { qemu_vfree(iov.iov_base); iov.iov_base = NULL; } @@ -3540,10 +3610,10 @@ static void send_qmp_error_event(BlockDriverState *bs, BlockErrorAction action, bool is_read, int error) { - BlockErrorAction ac; + IoOperationType optype; - ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; - qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action, + optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; + qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, bdrv_iostatus_is_enabled(bs), error == ENOSPC, strerror(error), &error_abort); @@ -3775,6 +3845,14 @@ bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) return top != NULL; } +BlockDriverState *bdrv_next_node(BlockDriverState *bs) +{ + if (!bs) { + return QTAILQ_FIRST(&graph_bdrv_states); + } + return QTAILQ_NEXT(bs, node_list); +} + BlockDriverState *bdrv_next(BlockDriverState *bs) { if (!bs) { @@ -3783,6 +3861,11 @@ BlockDriverState *bdrv_next(BlockDriverState *bs) return QTAILQ_NEXT(bs, device_list); } +const char *bdrv_get_node_name(const BlockDriverState *bs) +{ + return bs->node_name; +} + /* TODO check what callers really want: bs->node_name or blk_name() */ const char *bdrv_get_device_name(const BlockDriverState *bs) { @@ -3877,9 +3960,9 @@ typedef struct BdrvCoGetBlockStatusData { } BdrvCoGetBlockStatusData; /* - * Returns true iff the specified sector is present in the disk image. Drivers - * not implementing the functionality are assumed to not support backing files, - * hence all their sectors are reported as allocated. + * Returns the allocation status of the specified sectors. + * Drivers not implementing the functionality are assumed to not support + * backing files, hence all their sectors are reported as allocated. * * If 'sector_num' is beyond the end of the disk image the return value is 0 * and 'pnum' is set to 0. @@ -3954,13 +4037,24 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, if (bs->file && (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && (ret & BDRV_BLOCK_OFFSET_VALID)) { + int file_pnum; + ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, - *pnum, pnum); + *pnum, &file_pnum); if (ret2 >= 0) { /* Ignore errors. This is just providing extra information, it * is useful but not necessary. */ - ret |= (ret2 & BDRV_BLOCK_ZERO); + if (!file_pnum) { + /* !file_pnum indicates an offset at or beyond the EOF; it is + * perfectly valid for the format block driver to point to such + * offsets, so catch it and mark everything as zero */ + ret |= BDRV_BLOCK_ZERO; + } else { + /* Limit request to the range reported by the protocol driver */ + *pnum = file_pnum; + ret |= (ret2 & BDRV_BLOCK_ZERO); + } } } @@ -4431,6 +4525,11 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, merge = 0; } + if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors + + reqs[i].nb_sectors > bs->bl.max_transfer_length) { + merge = 0; + } + if (merge) { size_t size; QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); @@ -5200,6 +5299,11 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size) return qemu_memalign(bdrv_opt_mem_align(bs), size); } +void *qemu_blockalign0(BlockDriverState *bs, size_t size) +{ + return memset(qemu_blockalign(bs, size), 0, size); +} + void *qemu_try_blockalign(BlockDriverState *bs, size_t size) { size_t align = bdrv_opt_mem_align(bs); @@ -5213,6 +5317,17 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size) return qemu_try_memalign(align, size); } +void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) +{ + void *mem = qemu_try_blockalign(bs, size); + + if (mem) { + memset(mem, 0, size); + } + + return mem; +} + /* * Check if all memory in this vector is sector aligned. */ @@ -5303,8 +5418,20 @@ void bdrv_dirty_iter_init(BlockDriverState *bs, hbitmap_iter_init(hbi, bitmap->bitmap, 0); } -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) +void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); +} + +void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); +} + +static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { @@ -5312,7 +5439,8 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, } } -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors) +static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { @@ -5483,6 +5611,18 @@ void bdrv_img_create(const char *filename, const char *fmt, return; } + if (!drv->create_opts) { + error_setg(errp, "Format driver '%s' does not support image creation", + drv->format_name); + return; + } + + if (!proto_drv->create_opts) { + error_setg(errp, "Protocol driver '%s' does not support image creation", + proto_drv->format_name); + return; + } + create_opts = qemu_opts_append(create_opts, drv->create_opts); create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); @@ -5539,22 +5679,27 @@ void bdrv_img_create(const char *filename, const char *fmt, if (size == -1) { if (backing_file) { BlockDriverState *bs; + char *full_backing = g_new0(char, PATH_MAX); int64_t size; int back_flags; + bdrv_get_full_backing_filename_from_filename(filename, backing_file, + full_backing, PATH_MAX, + &local_err); + if (local_err) { + g_free(full_backing); + goto out; + } + /* backing files always opened read-only */ back_flags = flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); bs = NULL; - ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags, + ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, backing_drv, &local_err); + g_free(full_backing); if (ret < 0) { - error_setg_errno(errp, -ret, "Could not open '%s': %s", - backing_file, - error_get_pretty(local_err)); - error_free(local_err); - local_err = NULL; goto out; } size = bdrv_getlength(bs); @@ -5575,8 +5720,8 @@ void bdrv_img_create(const char *filename, const char *fmt, } if (!quiet) { - printf("Formatting '%s', fmt=%s ", filename, fmt); - qemu_opts_print(opts); + printf("Formatting '%s', fmt=%s", filename, fmt); + qemu_opts_print(opts, " "); puts(""); } @@ -5723,12 +5868,13 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs, notifier_with_return_list_add(&bs->before_write_notifiers, notifier); } -int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts) +int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb) { if (!bs->drv->bdrv_amend_options) { return -ENOTSUP; } - return bs->drv->bdrv_amend_options(bs, opts); + return bs->drv->bdrv_amend_options(bs, opts, status_cb); } /* This function will be called by the bdrv_recurse_is_first_non_filter method @@ -5791,13 +5937,19 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate) BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) { BlockDriverState *to_replace_bs = bdrv_find_node(node_name); + AioContext *aio_context; + if (!to_replace_bs) { error_setg(errp, "Node name '%s' not found", node_name); return NULL; } + aio_context = bdrv_get_aio_context(to_replace_bs); + aio_context_acquire(aio_context); + if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { - return NULL; + to_replace_bs = NULL; + goto out; } /* We don't want arbitrary node of the BDS chain to be replaced only the top @@ -5807,9 +5959,12 @@ BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) */ if (!bdrv_is_first_non_filter(to_replace_bs)) { error_setg(errp, "Only top most non filter can be replaced"); - return NULL; + to_replace_bs = NULL; + goto out; } +out: + aio_context_release(aio_context); return to_replace_bs; }