#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "qemu/queue.h"
#include "trace.h"
#include "nbd-internal.h"
+#include "qemu/units.h"
#define NBD_META_ID_BASE_ALLOCATION 0
#define NBD_META_ID_DIRTY_BITMAP 1
-/* NBD_MAX_BITMAP_EXTENTS: 1 mb of extents data. An empirical
+/*
+ * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical
* constant. If an increase is needed, note that the NBD protocol
* recommends no larger than 32 mb, so that the client won't consider
- * the reply as a denial of service attack. */
-#define NBD_MAX_BITMAP_EXTENTS (0x100000 / 8)
+ * the reply as a denial of service attack.
+ */
+#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
static int system_errno_to_nbd_errno(int err)
{
return NBD_ENOSPC;
case EOVERFLOW:
return NBD_EOVERFLOW;
+ case ENOTSUP:
+#if ENOTSUP != EOPNOTSUPP
+ case EOPNOTSUPP:
+#endif
+ return NBD_ENOTSUP;
case ESHUTDOWN:
return NBD_ESHUTDOWN;
case EINVAL:
nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
Error **errp, const char *fmt, va_list va)
{
- char *msg;
+ g_autofree char *msg = NULL;
int ret;
size_t len;
trace_nbd_negotiate_send_rep_err(msg);
ret = nbd_negotiate_send_rep_len(client, type, len, errp);
if (ret < 0) {
- goto out;
+ return ret;
}
if (nbd_write(client->ioc, msg, len, errp) < 0) {
error_prepend(errp, "write failed (error message): ");
- ret = -EIO;
- } else {
- ret = 0;
+ return -EIO;
}
-out:
- g_free(msg);
- return ret;
+ return 0;
}
/* Send an error reply.
/* nbd_opt_read_name
*
* Read a string with the format:
- * uint32_t len (<= NBD_MAX_NAME_SIZE)
+ * uint32_t len (<= NBD_MAX_STRING_SIZE)
* len bytes string (not 0-terminated)
*
- * @name should be enough to store NBD_MAX_NAME_SIZE+1.
+ * On success, @name will be allocated.
* If @length is non-null, it will be set to the actual string length.
*
* Return -errno on I/O error, 0 if option was completely handled by
* sending a reply about inconsistent lengths, or 1 on success.
*/
-static int nbd_opt_read_name(NBDClient *client, char *name, uint32_t *length,
+static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
Error **errp)
{
int ret;
uint32_t len;
+ g_autofree char *local_name = NULL;
+ *name = NULL;
ret = nbd_opt_read(client, &len, sizeof(len), errp);
if (ret <= 0) {
return ret;
}
len = cpu_to_be32(len);
- if (len > NBD_MAX_NAME_SIZE) {
+ if (len > NBD_MAX_STRING_SIZE) {
return nbd_opt_invalid(client, errp,
"Invalid name length: %" PRIu32, len);
}
- ret = nbd_opt_read(client, name, len, errp);
+ local_name = g_malloc(len + 1);
+ ret = nbd_opt_read(client, local_name, len, errp);
if (ret <= 0) {
return ret;
}
- name[len] = '\0';
+ local_name[len] = '\0';
if (length) {
*length = len;
}
+ *name = g_steal_pointer(&local_name);
return 1;
}
trace_nbd_negotiate_send_rep_list(name, desc);
name_len = strlen(name);
desc_len = strlen(desc);
+ assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE);
len = name_len + desc_len + sizeof(len);
ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
if (ret < 0) {
/* Send a reply to NBD_OPT_EXPORT_NAME.
* Return -errno on error, 0 on success. */
-static int nbd_negotiate_handle_export_name(NBDClient *client,
- uint16_t myflags, bool no_zeroes,
+static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
Error **errp)
{
- char name[NBD_MAX_NAME_SIZE + 1];
+ g_autofree char *name = NULL;
char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
size_t len;
int ret;
+ uint16_t myflags;
/* Client sends:
[20 .. xx] export name (length bytes)
[10 .. 133] reserved (0) [unless no_zeroes]
*/
trace_nbd_negotiate_handle_export_name();
- if (client->optlen >= sizeof(name)) {
+ if (client->optlen > NBD_MAX_STRING_SIZE) {
error_setg(errp, "Bad length received");
return -EINVAL;
}
+ name = g_malloc(client->optlen + 1);
if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
return -EIO;
}
return -EINVAL;
}
- trace_nbd_negotiate_new_style_size_flags(client->exp->size,
- client->exp->nbdflags | myflags);
+ myflags = client->exp->nbdflags;
+ if (client->structured_reply) {
+ myflags |= NBD_FLAG_SEND_DF;
+ }
+ trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
stq_be_p(buf, client->exp->size);
- stw_be_p(buf + 8, client->exp->nbdflags | myflags);
+ stw_be_p(buf + 8, myflags);
len = no_zeroes ? 10 : sizeof(buf);
ret = nbd_write(client->ioc, buf, len, errp);
if (ret < 0) {
/* Handle NBD_OPT_INFO and NBD_OPT_GO.
* Return -errno on error, 0 if ready for next option, and 1 to move
* into transmission phase. */
-static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
- Error **errp)
+static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
{
int rc;
- char name[NBD_MAX_NAME_SIZE + 1];
+ g_autofree char *name = NULL;
NBDExport *exp;
uint16_t requests;
uint16_t request;
uint32_t sizes[3];
char buf[sizeof(uint64_t) + sizeof(uint16_t)];
uint32_t check_align = 0;
+ uint16_t myflags;
/* Client sends:
4 bytes: L, name length (can be 0)
2 bytes: N, number of requests (can be 0)
N * 2 bytes: N requests
*/
- rc = nbd_opt_read_name(client, name, &namelen, errp);
+ rc = nbd_opt_read_name(client, &name, &namelen, errp);
if (rc <= 0) {
return rc;
}
if (exp->description) {
size_t len = strlen(exp->description);
+ assert(len <= NBD_MAX_STRING_SIZE);
rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
len, exp->description, errp);
if (rc < 0) {
}
/* Send NBD_INFO_EXPORT always */
- trace_nbd_negotiate_new_style_size_flags(exp->size,
- exp->nbdflags | myflags);
+ myflags = exp->nbdflags;
+ if (client->structured_reply) {
+ myflags |= NBD_FLAG_SEND_DF;
+ }
+ trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
stq_be_p(buf, exp->size);
- stw_be_p(buf + 8, exp->nbdflags | myflags);
+ stw_be_p(buf + 8, myflags);
rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
sizeof(buf), buf, errp);
if (rc < 0) {
{.iov_base = (void *)context, .iov_len = strlen(context)}
};
+ assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE);
if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
context_id = 0;
}
* Parse namespace name and call corresponding function to parse body of the
* query.
*
- * The only supported namespace now is 'base'.
+ * The only supported namespaces are 'base' and 'qemu'.
*
* The function aims not wasting time and memory to read long unknown namespace
* names.
}
len = cpu_to_be32(len);
+ if (len > NBD_MAX_STRING_SIZE) {
+ trace_nbd_negotiate_meta_query_skip("length too long");
+ return nbd_opt_skip(client, len, errp);
+ }
if (len < ns_len) {
trace_nbd_negotiate_meta_query_skip("length too short");
return nbd_opt_skip(client, len, errp);
NBDExportMetaContexts *meta, Error **errp)
{
int ret;
- char export_name[NBD_MAX_NAME_SIZE + 1];
+ g_autofree char *export_name = NULL;
NBDExportMetaContexts local_meta;
uint32_t nb_queries;
int i;
memset(meta, 0, sizeof(*meta));
- ret = nbd_opt_read_name(client, export_name, NULL, errp);
+ ret = nbd_opt_read_name(client, &export_name, NULL, errp);
if (ret <= 0) {
return ret;
}
* 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect,
* errp is not set
*/
-static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
- Error **errp)
+static int nbd_negotiate_options(NBDClient *client, Error **errp)
{
uint32_t flags;
bool fixedNewstyle = false;
return 1;
case NBD_OPT_EXPORT_NAME:
- return nbd_negotiate_handle_export_name(client,
- myflags, no_zeroes,
+ return nbd_negotiate_handle_export_name(client, no_zeroes,
errp);
case NBD_OPT_INFO:
case NBD_OPT_GO:
- ret = nbd_negotiate_handle_info(client, myflags, errp);
+ ret = nbd_negotiate_handle_info(client, errp);
if (ret == 1) {
assert(option == NBD_OPT_GO);
return 0;
} else {
ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
client->structured_reply = true;
- myflags |= NBD_FLAG_SEND_DF;
}
break;
*/
switch (option) {
case NBD_OPT_EXPORT_NAME:
- return nbd_negotiate_handle_export_name(client,
- myflags, no_zeroes,
+ return nbd_negotiate_handle_export_name(client, no_zeroes,
errp);
default:
{
char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
int ret;
- const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
- NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
- NBD_FLAG_SEND_WRITE_ZEROES | NBD_FLAG_SEND_CACHE);
/* Old style negotiation header, no room for options
[ 0 .. 7] passwd ("NBDMAGIC")
error_prepend(errp, "write failed: ");
return -EINVAL;
}
- ret = nbd_negotiate_options(client, myflags, errp);
+ ret = nbd_negotiate_options(client, errp);
if (ret != 0) {
if (ret < 0) {
error_prepend(errp, "option negotiation failed: ");
return ret;
}
+ /* Attach the channel to the same AioContext as the export */
+ if (client->exp && client->exp->ctx) {
+ qio_channel_attach_aio_context(client->ioc, client->exp->ctx);
+ }
+
assert(!client->optlen);
trace_nbd_negotiate_success();
static void nbd_eject_notifier(Notifier *n, void *data)
{
NBDExport *exp = container_of(n, NBDExport, eject_notifier);
+ AioContext *aio_context;
+
+ aio_context = exp->ctx;
+ aio_context_acquire(aio_context);
nbd_export_close(exp);
+ aio_context_release(aio_context);
}
NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
uint64_t size, const char *name, const char *desc,
- const char *bitmap, uint16_t nbdflags,
+ const char *bitmap, bool readonly, bool shared,
void (*close)(NBDExport *), bool writethrough,
BlockBackend *on_eject_blk, Error **errp)
{
* NBD exports are used for non-shared storage migration. Make sure
* that BDRV_O_INACTIVE is cleared and the image is ready for write
* access since the export could be available before migration handover.
+ * ctx was acquired in the caller.
*/
- assert(name);
+ assert(name && strlen(name) <= NBD_MAX_STRING_SIZE);
ctx = bdrv_get_aio_context(bs);
- aio_context_acquire(ctx);
bdrv_invalidate_cache(bs, NULL);
- aio_context_release(ctx);
/* Don't allow resize while the NBD server is running, otherwise we don't
* care what happens with the node. */
perm = BLK_PERM_CONSISTENT_READ;
- if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
+ if (!readonly) {
perm |= BLK_PERM_WRITE;
}
- blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
- BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+ blk = blk_new(ctx, perm,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
goto fail;
}
blk_set_enable_write_cache(blk, !writethrough);
+ blk_set_allow_aio_context_change(blk, true);
exp->refcount = 1;
QTAILQ_INIT(&exp->clients);
assert(dev_offset <= INT64_MAX);
exp->dev_offset = dev_offset;
exp->name = g_strdup(name);
+ assert(!desc || strlen(desc) <= NBD_MAX_STRING_SIZE);
exp->description = g_strdup(desc);
- exp->nbdflags = nbdflags;
+ exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
+ NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
+ if (readonly) {
+ exp->nbdflags |= NBD_FLAG_READ_ONLY;
+ if (shared) {
+ exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
+ }
+ } else {
+ exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
+ NBD_FLAG_SEND_FAST_ZERO);
+ }
assert(size <= INT64_MAX - dev_offset);
exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
goto fail;
}
- if ((nbdflags & NBD_FLAG_READ_ONLY) && bdrv_is_writable(bs) &&
+ if (readonly && bdrv_is_writable(bs) &&
bdrv_dirty_bitmap_enabled(bm)) {
error_setg(errp,
"Enabled bitmap '%s' incompatible with readonly export",
bdrv_dirty_bitmap_set_busy(bm, true);
exp->export_bitmap = bm;
+ assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
exp->export_bitmap_context = g_strdup_printf("qemu:dirty-bitmap:%s",
bitmap);
+ assert(strlen(exp->export_bitmap_context) < NBD_MAX_STRING_SIZE);
}
exp->close = close;
- exp->ctx = blk_get_aio_context(blk);
+ exp->ctx = ctx;
blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
if (on_eject_blk) {
return NULL;
}
+AioContext *
+nbd_export_aio_context(NBDExport *exp)
+{
+ return exp->ctx;
+}
+
void nbd_export_close(NBDExport *exp)
{
NBDClient *client, *next;
void nbd_export_close_all(void)
{
NBDExport *exp, *next;
+ AioContext *aio_context;
QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
+ aio_context = exp->ctx;
+ aio_context_acquire(aio_context);
nbd_export_close(exp);
+ aio_context_release(aio_context);
}
}
Error **errp)
{
int ret;
- unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+ unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
NBDExtent *extents = g_new(NBDExtent, nb_extents);
uint64_t final_length = length;
bdrv_dirty_bitmap_lock(bitmap);
it = bdrv_dirty_iter_new(bitmap);
- dirty = bdrv_get_dirty_locked(NULL, bitmap, offset);
+ dirty = bdrv_dirty_bitmap_get_locked(bitmap, offset);
assert(begin < overall_end && nb_extents);
while (begin < overall_end && i < nb_extents) {
uint32_t context_id, Error **errp)
{
int ret;
- unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+ unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
NBDExtent *extents = g_new(NBDExtent, nb_extents);
uint64_t final_length = length;
return -EINVAL;
}
- req->data = blk_try_blockalign(client->exp->blk, request->len);
- if (req->data == NULL) {
- error_setg(errp, "No memory");
- return -ENOMEM;
+ if (request->type != NBD_CMD_CACHE) {
+ req->data = blk_try_blockalign(client->exp->blk, request->len);
+ if (req->data == NULL) {
+ error_setg(errp, "No memory");
+ return -ENOMEM;
+ }
}
}
+
if (request->type == NBD_CMD_WRITE) {
if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
errp) < 0)
if (request->type == NBD_CMD_READ && client->structured_reply) {
valid_flags |= NBD_CMD_FLAG_DF;
} else if (request->type == NBD_CMD_WRITE_ZEROES) {
- valid_flags |= NBD_CMD_FLAG_NO_HOLE;
+ valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
} else if (request->type == NBD_CMD_BLOCK_STATUS) {
valid_flags |= NBD_CMD_FLAG_REQ_ONE;
}
int ret;
NBDExport *exp = client->exp;
- assert(request->type == NBD_CMD_READ || request->type == NBD_CMD_CACHE);
+ assert(request->type == NBD_CMD_READ);
/* XXX: NBD Protocol only documents use of FUA with WRITE */
if (request->flags & NBD_CMD_FLAG_FUA) {
}
if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
- request->len && request->type != NBD_CMD_CACHE)
+ request->len)
{
return nbd_co_send_sparse_read(client, request->handle, request->from,
data, request->len, errp);
ret = blk_pread(exp->blk, request->from + exp->dev_offset, data,
request->len);
- if (ret < 0 || request->type == NBD_CMD_CACHE) {
+ if (ret < 0) {
return nbd_send_generic_reply(client, request->handle, ret,
"reading from file failed", errp);
}
}
}
+/*
+ * nbd_do_cmd_cache
+ *
+ * Handle NBD_CMD_CACHE request.
+ * Return -errno if sending fails. Other errors are reported directly to the
+ * client as an error reply.
+ */
+static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
+ Error **errp)
+{
+ int ret;
+ NBDExport *exp = client->exp;
+
+ assert(request->type == NBD_CMD_CACHE);
+
+ ret = blk_co_preadv(exp->blk, request->from + exp->dev_offset, request->len,
+ NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+
+ return nbd_send_generic_reply(client, request->handle, ret,
+ "caching data failed", errp);
+}
+
/* Handle NBD request.
* Return -errno if sending fails. Other errors are reported directly to the
* client as an error reply. */
char *msg;
switch (request->type) {
- case NBD_CMD_READ:
case NBD_CMD_CACHE:
+ return nbd_do_cmd_cache(client, request, errp);
+
+ case NBD_CMD_READ:
return nbd_do_cmd_read(client, request, data, errp);
case NBD_CMD_WRITE:
if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
flags |= BDRV_REQ_MAY_UNMAP;
}
+ if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
+ flags |= BDRV_REQ_NO_FALLBACK;
+ }
ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset,
request->len, flags);
return nbd_send_generic_reply(client, request->handle, ret,