#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
+
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
co = qemu_coroutine_create(bdrv_create_co_entry);
qemu_coroutine_enter(co, &cco);
while (cco.ret == NOT_DONE) {
- qemu_aio_wait();
+ aio_poll(qemu_get_aio_context(), true);
}
}
return ret;
}
-int bdrv_refresh_limits(BlockDriverState *bs)
+void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
{
BlockDriver *drv = bs->drv;
+ Error *local_err = NULL;
memset(&bs->bl, 0, sizeof(bs->bl));
if (!drv) {
- return 0;
+ return;
}
/* Take some limits from the children as a default */
if (bs->file) {
- bdrv_refresh_limits(bs->file);
+ bdrv_refresh_limits(bs->file, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
} else {
}
if (bs->backing_hd) {
- bdrv_refresh_limits(bs->backing_hd);
+ bdrv_refresh_limits(bs->backing_hd, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
bs->bl.opt_transfer_length =
MAX(bs->bl.opt_transfer_length,
bs->backing_hd->bl.opt_transfer_length);
/* Then let the driver override it */
if (drv->bdrv_refresh_limits) {
- return drv->bdrv_refresh_limits(bs);
+ drv->bdrv_refresh_limits(bs, errp);
}
-
- return 0;
}
/*
/**
* Set the current 'total_sectors' value
+ * Return 0 on success, -errno on error.
*/
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
goto free_and_fail;
}
- bdrv_refresh_limits(bs);
+ bdrv_refresh_limits(bs, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto free_and_fail;
+ }
+
assert(bdrv_opt_mem_align(bs) != 0);
assert((bs->request_alignment != 0) || bs->sg);
return 0;
bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
bs->backing_blocker);
out:
- bdrv_refresh_limits(bs);
+ bdrv_refresh_limits(bs, NULL);
}
/*
return ret;
}
-void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
+int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
{
/* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
char *tmp_filename = g_malloc0(PATH_MAX + 1);
/* Get the required size from the image */
total_size = bdrv_getlength(bs);
if (total_size < 0) {
+ ret = total_size;
error_setg_errno(errp, -total_size, "Could not get image size");
goto out;
}
- total_size &= BDRV_SECTOR_MASK;
/* Create the temporary image */
ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
out:
g_free(tmp_filename);
+ return ret;
}
/*
/* For snapshot=on, create a temporary qcow2 overlay. bs points to the
* temporary snapshot afterwards. */
if (snapshot_flags) {
- bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+ ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
if (local_err) {
- error_propagate(errp, local_err);
goto close_and_fail;
}
}
BDRV_O_CACHE_WB);
reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
- bdrv_refresh_limits(reopen_state->bs);
+ bdrv_refresh_limits(reopen_state->bs, NULL);
}
/*
bool bs_busy;
aio_context_acquire(aio_context);
+ bdrv_flush_io_queue(bs);
bdrv_start_throttled_reqs(bs);
bs_busy = bdrv_requests_pending(bs);
bs_busy |= aio_poll(aio_context, bs_busy);
}
bs->dev = dev;
bdrv_iostatus_reset(bs);
+
+ /* We're expecting I/O from the device so bump up coroutine pool size */
+ qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
return 0;
}
bs->dev_ops = NULL;
bs->dev_opaque = NULL;
bs->guest_block_size = 512;
+ qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
}
/* TODO change to return DeviceState * when all users are qdevified */
}
total_sectors = length >> BDRV_SECTOR_BITS;
- buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+
+ /* qemu_try_blockalign() for bs will choose an alignment that works for
+ * bs->backing_hd as well, so no need to compare the alignment manually. */
+ buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+ if (buf == NULL) {
+ ret = -ENOMEM;
+ goto ro_cleanup;
+ }
for (sector = 0; sector < total_sectors; sector += n) {
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
ret = 0;
ro_cleanup:
- g_free(buf);
+ qemu_vfree(buf);
if (ro) {
/* ignoring error return here */
*
* Returns NULL if bs is not found in active's image chain,
* or if active == bs.
+ *
+ * Returns the bottommost base image if bs == NULL.
*/
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
BlockDriverState *bs)
{
- BlockDriverState *overlay = NULL;
- BlockDriverState *intermediate;
-
- assert(active != NULL);
- assert(bs != NULL);
-
- /* if bs is the same as active, then by definition it has no overlay
- */
- if (active == bs) {
- return NULL;
+ while (active && bs != active->backing_hd) {
+ active = active->backing_hd;
}
- intermediate = active;
- while (intermediate->backing_hd) {
- if (intermediate->backing_hd == bs) {
- overlay = intermediate;
- break;
- }
- intermediate = intermediate->backing_hd;
- }
+ return active;
+}
- return overlay;
+/* Given a BDS, searches for the base layer. */
+BlockDriverState *bdrv_find_base(BlockDriverState *bs)
+{
+ return bdrv_find_overlay(bs, NULL);
}
typedef struct BlkIntermediateStates {
*
* base <- active
*
+ * If backing_file_str is non-NULL, it will be used when modifying top's
+ * overlay image metadata.
+ *
* Error conditions:
* if active == top, that is considered an error
*
*/
int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
- BlockDriverState *base)
+ BlockDriverState *base, const char *backing_file_str)
{
BlockDriverState *intermediate;
BlockDriverState *base_bs = NULL;
}
/* success - we can delete the intermediate states, and link top->base */
- ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+ backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
+ ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
base_bs->drv ? base_bs->drv->format_name : "");
if (ret) {
goto exit;
*/
int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
{
- int64_t target_size;
- int64_t ret, nb_sectors, sector_num = 0;
+ int64_t target_sectors, ret, nb_sectors, sector_num = 0;
int n;
- target_size = bdrv_getlength(bs);
- if (target_size < 0) {
- return target_size;
+ target_sectors = bdrv_nb_sectors(bs);
+ if (target_sectors < 0) {
+ return target_sectors;
}
- target_size /= BDRV_SECTOR_SIZE;
for (;;) {
- nb_sectors = target_size - sector_num;
+ nb_sectors = target_sectors - sector_num;
if (nb_sectors <= 0) {
return 0;
}
cluster_sector_num, cluster_nb_sectors);
iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
+ iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
+ if (bounce_buffer == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert(!qiov || bytes == qiov->size);
/* Handle Copy on Read and associated serialisation */
if (flags & BDRV_REQ_COPY_ON_READ) {
ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
} else {
/* Read zeros after EOF of growable BDSes */
- int64_t len, total_sectors, max_nb_sectors;
+ int64_t total_sectors, max_nb_sectors;
- len = bdrv_getlength(bs);
- if (len < 0) {
- ret = len;
+ total_sectors = bdrv_nb_sectors(bs);
+ if (total_sectors < 0) {
+ ret = total_sectors;
goto out;
}
- total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
align >> BDRV_SECTOR_BITS);
if (max_nb_sectors > 0) {
- ret = drv->bdrv_co_readv(bs, sector_num,
- MIN(nb_sectors, max_nb_sectors), qiov);
+ QEMUIOVector local_qiov;
+ size_t local_sectors;
+
+ max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
+ local_sectors = MIN(max_nb_sectors, nb_sectors);
+
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, 0,
+ local_sectors * BDRV_SECTOR_SIZE);
+
+ ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
+ &local_qiov);
+
+ qemu_iovec_destroy(&local_qiov);
} else {
ret = 0;
}
/* Fall back to bounce buffer if write zeroes is unsupported */
iov.iov_len = num * BDRV_SECTOR_SIZE;
if (iov.iov_base == NULL) {
- iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
+ iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
+ if (iov.iov_base == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
}
qemu_iovec_init_external(&qiov, &iov, 1);
nb_sectors -= num;
}
+fail:
qemu_vfree(iov.iov_base);
return ret;
}
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert(!qiov || bytes == qiov->size);
waited = wait_serialising_requests(req);
assert(!waited || !req->serialising);
}
/**
- * Length of a file in bytes. Return < 0 if error or unknown.
+ * Return number of sectors on success, -errno on error.
*/
-int64_t bdrv_getlength(BlockDriverState *bs)
+int64_t bdrv_nb_sectors(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+
if (!drv)
return -ENOMEDIUM;
return ret;
}
}
- return bs->total_sectors * BDRV_SECTOR_SIZE;
+ return bs->total_sectors;
+}
+
+/**
+ * Return length in bytes on success, -errno on error.
+ * The length is always a multiple of BDRV_SECTOR_SIZE.
+ */
+int64_t bdrv_getlength(BlockDriverState *bs)
+{
+ int64_t ret = bdrv_nb_sectors(bs);
+
+ return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
}
/* return 0 as number of sectors if no device present or error */
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
{
- int64_t length;
- length = bdrv_getlength(bs);
- if (length < 0)
- length = 0;
- else
- length = length >> BDRV_SECTOR_BITS;
- *nb_sectors_ptr = length;
+ int64_t nb_sectors = bdrv_nb_sectors(bs);
+
+ *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
return NULL;
}
+/* If 'base' is in the same chain as 'top', return true. Otherwise,
+ * return false. If either argument is NULL, return false. */
+bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
+{
+ while (top && top != base) {
+ top = top->backing_hd;
+ }
+
+ return top != NULL;
+}
+
BlockDriverState *bdrv_next(BlockDriverState *bs)
{
if (!bs) {
int64_t sector_num,
int nb_sectors, int *pnum)
{
- int64_t length;
+ int64_t total_sectors;
int64_t n;
int64_t ret, ret2;
- length = bdrv_getlength(bs);
- if (length < 0) {
- return length;
+ total_sectors = bdrv_nb_sectors(bs);
+ if (total_sectors < 0) {
+ return total_sectors;
}
- if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
+ if (sector_num >= total_sectors) {
*pnum = 0;
return 0;
}
- n = bs->total_sectors - sector_num;
+ n = total_sectors - sector_num;
if (n < nb_sectors) {
nb_sectors = n;
}
ret |= BDRV_BLOCK_ZERO;
} else if (bs->backing_hd) {
BlockDriverState *bs2 = bs->backing_hd;
- int64_t length2 = bdrv_getlength(bs2);
- if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
+ int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
+ if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
ret |= BDRV_BLOCK_ZERO;
}
}
if (ret < 0) {
return ret;
}
- return (ret & BDRV_BLOCK_ALLOCATED);
+ return !!(ret & BDRV_BLOCK_ALLOCATED);
}
/*
return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
}
-BlockDriverState *bdrv_find_base(BlockDriverState *bs)
-{
- BlockDriverState *curr_bs = NULL;
-
- if (!bs) {
- return NULL;
- }
-
- curr_bs = bs;
-
- while (curr_bs->backing_hd) {
- curr_bs = curr_bs->backing_hd;
- }
- return curr_bs;
-}
-
/**************************************************************/
/* async I/Os */
{
BlockDriverAIOCBSync *acb = opaque;
- if (!acb->is_write)
+ if (!acb->is_write && acb->ret >= 0) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+ }
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, acb->ret);
qemu_bh_delete(acb->bh);
acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
acb->is_write = is_write;
acb->qiov = qiov;
- acb->bounce = qemu_blockalign(bs, qiov->size);
+ acb->bounce = qemu_try_blockalign(bs, qiov->size);
acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
- if (is_write) {
+ if (acb->bounce == NULL) {
+ acb->ret = -ENOMEM;
+ } else if (is_write) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
} else {
return qemu_memalign(bdrv_opt_mem_align(bs), size);
}
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
+{
+ size_t align = bdrv_opt_mem_align(bs);
+
+ /* Ensure that NULL is never returned on success */
+ assert(align > 0);
+ if (size == 0) {
+ size = align;
+ }
+
+ return qemu_try_memalign(align, size);
+}
+
/*
* Check if all memory in this vector is sector aligned.
*/
granularity >>= BDRV_SECTOR_BITS;
assert(granularity);
- bitmap_size = bdrv_getlength(bs);
+ bitmap_size = bdrv_nb_sectors(bs);
if (bitmap_size < 0) {
error_setg_errno(errp, -bitmap_size, "could not get length of device");
errno = -bitmap_size;
return NULL;
}
- bitmap_size >>= BDRV_SECTOR_BITS;
bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
* deleted. */
void bdrv_unref(BlockDriverState *bs)
{
+ if (!bs) {
+ return;
+ }
assert(bs->refcnt > 0);
if (--bs->refcnt == 0) {
bdrv_delete(bs);
if (size == -1) {
if (backing_file) {
BlockDriverState *bs;
- uint64_t size;
+ int64_t size;
int back_flags;
/* backing files always opened read-only */
local_err = NULL;
goto out;
}
- bdrv_get_geometry(bs, &size);
- size *= 512;
+ size = bdrv_getlength(bs);
+ if (size < 0) {
+ error_setg_errno(errp, -size, "Could not get size of '%s'",
+ backing_file);
+ bdrv_unref(bs);
+ goto out;
+ }
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
return false;
}
+
+BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
+{
+ BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
+ if (!to_replace_bs) {
+ error_setg(errp, "Node name '%s' not found", node_name);
+ return NULL;
+ }
+
+ if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
+ return NULL;
+ }
+
+ /* We don't want arbitrary node of the BDS chain to be replaced only the top
+ * most non filter in order to prevent data corruption.
+ * Another benefit is that this tests exclude backing files which are
+ * blocked by the backing blockers.
+ */
+ if (!bdrv_is_first_non_filter(to_replace_bs)) {
+ error_setg(errp, "Only top most non filter can be replaced");
+ return NULL;
+ }
+
+ return to_replace_bs;
+}
+
+void bdrv_io_plug(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_plug) {
+ drv->bdrv_io_plug(bs);
+ } else if (bs->file) {
+ bdrv_io_plug(bs->file);
+ }
+}
+
+void bdrv_io_unplug(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_unplug) {
+ drv->bdrv_io_unplug(bs);
+ } else if (bs->file) {
+ bdrv_io_unplug(bs->file);
+ }
+}
+
+void bdrv_flush_io_queue(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_flush_io_queue) {
+ drv->bdrv_flush_io_queue(bs);
+ } else if (bs->file) {
+ bdrv_flush_io_queue(bs->file);
+ }
+}