block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
block-obj-y += accounting.o
+block-obj-y += write-threshold.o
common-obj-y += stream.o
common-obj-y += commit.o
#include "block/qapi.h"
#include "block/block_int.h"
+#include "block/write-threshold.h"
#include "qmp-commands.h"
#include "qapi-visit.h"
#include "qapi/qmp-output-visitor.h"
info->iops_size = cfg.op_size;
}
+ info->write_threshold = bdrv_write_threshold_get(bs);
+
return info;
}
--- /dev/null
+/*
+ * QEMU System Emulator block write threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "block/block_int.h"
+#include "block/coroutine.h"
+#include "block/write-threshold.h"
+#include "qemu/notify.h"
+#include "qapi-event.h"
+#include "qmp-commands.h"
+
+
+uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
+{
+ return bs->write_threshold_offset;
+}
+
+bool bdrv_write_threshold_is_set(const BlockDriverState *bs)
+{
+ return bs->write_threshold_offset > 0;
+}
+
+static void write_threshold_disable(BlockDriverState *bs)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ notifier_with_return_remove(&bs->write_threshold_notifier);
+ bs->write_threshold_offset = 0;
+ }
+}
+
+uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
+ const BdrvTrackedRequest *req)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ if (req->offset > bs->write_threshold_offset) {
+ return (req->offset - bs->write_threshold_offset) + req->bytes;
+ }
+ if ((req->offset + req->bytes) > bs->write_threshold_offset) {
+ return (req->offset + req->bytes) - bs->write_threshold_offset;
+ }
+ }
+ return 0;
+}
+
+static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
+ void *opaque)
+{
+ BdrvTrackedRequest *req = opaque;
+ BlockDriverState *bs = req->bs;
+ uint64_t amount = 0;
+
+ amount = bdrv_write_threshold_exceeded(bs, req);
+ if (amount > 0) {
+ qapi_event_send_block_write_threshold(
+ bs->node_name,
+ amount,
+ bs->write_threshold_offset,
+ &error_abort);
+
+ /* autodisable to avoid flooding the monitor */
+ write_threshold_disable(bs);
+ }
+
+ return 0; /* should always let other notifiers run */
+}
+
+static void write_threshold_register_notifier(BlockDriverState *bs)
+{
+ bs->write_threshold_notifier.notify = before_write_notify;
+ notifier_with_return_list_add(&bs->before_write_notifiers,
+ &bs->write_threshold_notifier);
+}
+
+static void write_threshold_update(BlockDriverState *bs,
+ int64_t threshold_bytes)
+{
+ bs->write_threshold_offset = threshold_bytes;
+}
+
+void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ if (threshold_bytes > 0) {
+ write_threshold_update(bs, threshold_bytes);
+ } else {
+ write_threshold_disable(bs);
+ }
+ } else {
+ if (threshold_bytes > 0) {
+ /* avoid multiple registration */
+ write_threshold_register_notifier(bs);
+ write_threshold_update(bs, threshold_bytes);
+ }
+ /* discard bogus disable request */
+ }
+}
+
+void qmp_block_set_write_threshold(const char *node_name,
+ uint64_t threshold_bytes,
+ Error **errp)
+{
+ BlockDriverState *bs;
+ AioContext *aio_context;
+
+ bs = bdrv_find_node(node_name);
+ if (!bs) {
+ error_set(errp, QERR_DEVICE_NOT_FOUND, node_name);
+ return;
+ }
+
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_write_threshold_set(bs, threshold_bytes);
+
+ aio_context_release(aio_context);
+}
/* The error object in use for blocking operations on backing_hd */
Error *backing_blocker;
+
+ /* threshold limit for writes, in bytes. "High water mark". */
+ uint64_t write_threshold_offset;
+ NotifierWithReturn write_threshold_notifier;
};
--- /dev/null
+/*
+ * QEMU System Emulator block write threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#ifndef BLOCK_WRITE_THRESHOLD_H
+#define BLOCK_WRITE_THRESHOLD_H
+
+#include <stdint.h>
+
+#include "qemu/typedefs.h"
+#include "qemu-common.h"
+
+/*
+ * bdrv_write_threshold_set:
+ *
+ * Set the write threshold for block devices, in bytes.
+ * Notify when a write exceeds the threshold, meaning the device
+ * is becoming full, so it can be transparently resized.
+ * To be used with thin-provisioned block devices.
+ *
+ * Use threshold_bytes == 0 to disable.
+ */
+void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes);
+
+/*
+ * bdrv_write_threshold_get
+ *
+ * Get the configured write threshold, in bytes.
+ * Zero means no threshold configured.
+ */
+uint64_t bdrv_write_threshold_get(const BlockDriverState *bs);
+
+/*
+ * bdrv_write_threshold_is_set
+ *
+ * Tell if a write threshold is set for a given BDS.
+ */
+bool bdrv_write_threshold_is_set(const BlockDriverState *bs);
+
+/*
+ * bdrv_write_threshold_exceeded
+ *
+ * Return the extent of a write request that exceeded the threshold,
+ * or zero if the request is below the threshold.
+ * Return zero also if the threshold was not set.
+ *
+ * NOTE: here we assume the following holds for each request this code
+ * deals with:
+ *
+ * assert((req->offset + req->bytes) <= UINT64_MAX)
+ *
+ * Please not there is *not* an actual C assert().
+ */
+uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
+ const BdrvTrackedRequest *req);
+
+#endif
#
# @cache: the cache mode used for the block device (since: 2.3)
#
+# @write_threshold: configured write threshold for the device.
+# 0 if disabled. (Since 2.3)
+#
# Since: 0.14.0
#
##
'*bps_max': 'int', '*bps_rd_max': 'int',
'*bps_wr_max': 'int', '*iops_max': 'int',
'*iops_rd_max': 'int', '*iops_wr_max': 'int',
- '*iops_size': 'int', 'cache': 'BlockdevCacheInfo' } }
+ '*iops_size': 'int', 'cache': 'BlockdevCacheInfo',
+ 'write_threshold': 'int' } }
##
# @BlockDeviceIoStatus:
##
{ 'enum': 'PreallocMode',
'data': [ 'off', 'metadata', 'falloc', 'full' ] }
+
+##
+# @BLOCK_WRITE_THRESHOLD
+#
+# Emitted when writes on block device reaches or exceeds the
+# configured write threshold. For thin-provisioned devices, this
+# means the device should be extended to avoid pausing for
+# disk exhaustion.
+# The event is one shot. Once triggered, it needs to be
+# re-registered with another block-set-threshold command.
+#
+# @node-name: graph node name on which the threshold was exceeded.
+#
+# @amount-exceeded: amount of data which exceeded the threshold, in bytes.
+#
+# @write-threshold: last configured threshold, in bytes.
+#
+# Since: 2.3
+##
+{ 'event': 'BLOCK_WRITE_THRESHOLD',
+ 'data': { 'node-name': 'str',
+ 'amount-exceeded': 'uint64',
+ 'write-threshold': 'uint64' } }
+
+##
+# @block-set-write-threshold
+#
+# Change the write threshold for a block drive. An event will be delivered
+# if a write to this block drive crosses the configured threshold.
+# This is useful to transparently resize thin-provisioned drives without
+# the guest OS noticing.
+#
+# @node-name: graph node name on which the threshold must be set.
+#
+# @write-threshold: configured threshold for the block device, bytes.
+# Use 0 to disable the threshold.
+#
+# Returns: Nothing on success
+# If @node name is not found on the block device graph,
+# DeviceNotFound
+#
+# Since: 2.3
+##
+{ 'command': 'block-set-write-threshold',
+ 'data': { 'node-name': 'str', 'write-threshold': 'uint64' } }
- "iops_size": I/O size when limiting by iops (json-int)
- "detect_zeroes": detect and optimize zero writing (json-string)
- Possible values: "off", "on", "unmap"
+ - "write_threshold": write offset threshold in bytes, a event will be
+ emitted if crossed. Zero if disabled (json-int)
- "image": the detail of the image, it is a json-object containing
the following:
- "filename": image file name (json-string)
"iops_wr_max": 0,
"iops_size": 0,
"detect_zeroes": "on",
+ "write_threshold": 0,
"image":{
"filename":"disks/test.qcow2",
"format":"qcow2",
"iops_rd_max": 0,
"iops_wr_max": 0,
"iops_size": 0,
+ "write_threshold": 0,
"image":{
"filename":"disks/test.qcow2",
"format":"qcow2",
{ "type": "abs", "data" : { "axis": "Y", "value" : 400 } } ] } }
<- { "return": {} }
+EQMP
+
+ {
+ .name = "block-set-write-threshold",
+ .args_type = "node-name:s,write-threshold:l",
+ .mhandler.cmd_new = qmp_marshal_input_block_set_write_threshold,
+ },
+
+SQMP
+block-set-write-threshold
+------------
+
+Change the write threshold for a block drive. The threshold is an offset,
+thus must be non-negative. Default is no write threshold.
+Setting the threshold to zero disables it.
+
+Arguments:
+
+- "node-name": the node name in the block driver state graph (json-string)
+- "write-threshold": the write threshold in bytes (json-int)
+
+Example:
+
+-> { "execute": "block-set-write-threshold",
+ "arguments": { "node-name": "mydev",
+ "write-threshold": 17179869184 } }
+<- { "return": {} }
+
EQMP
gcov-files-check-qom-interface-y = qom/object.c
check-unit-y += tests/test-qemu-opts$(EXESUF)
gcov-files-test-qemu-opts-y = qom/test-qemu-opts.c
+check-unit-y += tests/test-write-threshold$(EXESUF)
+gcov-files-test-write-threshold-y = block/write-threshold.c
check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y)
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a
+tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(block-obj-y) libqemuutil.a libqemustub.a
ifeq ($(CONFIG_POSIX),y)
LIBS += -lutil
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
"drv": "qcow2",
"iops": 0,
"bps_wr": 0,
+ "write_threshold": 0,
"encrypted": false,
"bps": 0,
"bps_rd": 0,
--- /dev/null
+/*
+ * Test block device write threshold
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include <glib.h>
+#include <stdint.h>
+#include "block/block_int.h"
+#include "block/write-threshold.h"
+
+
+static void test_threshold_not_set_on_init(void)
+{
+ uint64_t res;
+ BlockDriverState bs;
+ memset(&bs, 0, sizeof(bs));
+
+ g_assert(!bdrv_write_threshold_is_set(&bs));
+
+ res = bdrv_write_threshold_get(&bs);
+ g_assert_cmpint(res, ==, 0);
+}
+
+static void test_threshold_set_get(void)
+{
+ uint64_t threshold = 4 * 1024 * 1024;
+ uint64_t res;
+ BlockDriverState bs;
+ memset(&bs, 0, sizeof(bs));
+
+ bdrv_write_threshold_set(&bs, threshold);
+
+ g_assert(bdrv_write_threshold_is_set(&bs));
+
+ res = bdrv_write_threshold_get(&bs);
+ g_assert_cmpint(res, ==, threshold);
+}
+
+static void test_threshold_multi_set_get(void)
+{
+ uint64_t threshold1 = 4 * 1024 * 1024;
+ uint64_t threshold2 = 15 * 1024 * 1024;
+ uint64_t res;
+ BlockDriverState bs;
+ memset(&bs, 0, sizeof(bs));
+
+ bdrv_write_threshold_set(&bs, threshold1);
+ bdrv_write_threshold_set(&bs, threshold2);
+ res = bdrv_write_threshold_get(&bs);
+ g_assert_cmpint(res, ==, threshold2);
+}
+
+static void test_threshold_not_trigger(void)
+{
+ uint64_t amount = 0;
+ uint64_t threshold = 4 * 1024 * 1024;
+ BlockDriverState bs;
+ BdrvTrackedRequest req;
+
+ memset(&bs, 0, sizeof(bs));
+ memset(&req, 0, sizeof(req));
+ req.offset = 1024;
+ req.bytes = 1024;
+
+ bdrv_write_threshold_set(&bs, threshold);
+ amount = bdrv_write_threshold_exceeded(&bs, &req);
+ g_assert_cmpuint(amount, ==, 0);
+}
+
+
+static void test_threshold_trigger(void)
+{
+ uint64_t amount = 0;
+ uint64_t threshold = 4 * 1024 * 1024;
+ BlockDriverState bs;
+ BdrvTrackedRequest req;
+
+ memset(&bs, 0, sizeof(bs));
+ memset(&req, 0, sizeof(req));
+ req.offset = (4 * 1024 * 1024) - 1024;
+ req.bytes = 2 * 1024;
+
+ bdrv_write_threshold_set(&bs, threshold);
+ amount = bdrv_write_threshold_exceeded(&bs, &req);
+ g_assert_cmpuint(amount, >=, 1024);
+}
+
+typedef struct TestStruct {
+ const char *name;
+ void (*func)(void);
+} TestStruct;
+
+
+int main(int argc, char **argv)
+{
+ size_t i;
+ TestStruct tests[] = {
+ { "/write-threshold/not-set-on-init",
+ test_threshold_not_set_on_init },
+ { "/write-threshold/set-get",
+ test_threshold_set_get },
+ { "/write-threshold/multi-set-get",
+ test_threshold_multi_set_get },
+ { "/write-threshold/not-trigger",
+ test_threshold_not_trigger },
+ { "/write-threshold/trigger",
+ test_threshold_trigger },
+ { NULL, NULL }
+ };
+
+ g_test_init(&argc, &argv, NULL);
+ for (i = 0; tests[i].name != NULL; i++) {
+ g_test_add_func(tests[i].name, tests[i].func);
+ }
+ return g_test_run();
+}