* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "nbd.h"
-#include "block.h"
+#include "block/nbd.h"
+#include "block/block.h"
-#include "qemu-coroutine.h"
+#include "block/coroutine.h"
#include <errno.h>
#include <string.h>
#include <linux/fs.h>
#endif
-#include "qemu_socket.h"
-#include "qemu-queue.h"
+#include "qemu/sockets.h"
+#include "qemu/queue.h"
+#include "qemu/main-loop.h"
//#define DEBUG_NBD
__FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
} while(0)
-/* This is all part of the "official" NBD API */
+/* This is all part of the "official" NBD API.
+ *
+ * The most up-to-date documentation is available at:
+ * https://github.com/yoe/nbd/blob/master/doc/proto.txt
+ */
#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4)
#define NBD_REPLY_SIZE (4 + 4 + 8)
#define NBD_REPLY_MAGIC 0x67446698
#define NBD_OPTS_MAGIC 0x49484156454F5054LL
#define NBD_CLIENT_MAGIC 0x0000420281861253LL
+#define NBD_REP_MAGIC 0x3e889045565a9LL
#define NBD_SET_SOCK _IO(0xab, 0)
#define NBD_SET_BLKSIZE _IO(0xab, 1)
#define NBD_SET_TIMEOUT _IO(0xab, 9)
#define NBD_SET_FLAGS _IO(0xab, 10)
-#define NBD_OPT_EXPORT_NAME (1 << 0)
+#define NBD_OPT_EXPORT_NAME (1)
+#define NBD_OPT_ABORT (2)
+#define NBD_OPT_LIST (3)
/* Definitions for opaque data types */
off_t size;
uint32_t nbdflags;
QTAILQ_HEAD(, NBDClient) clients;
- QSIMPLEQ_HEAD(, NBDRequest) requests;
QTAILQ_ENTRY(NBDExport) next;
};
return ret;
}
-static void combine_addr(char *buf, size_t len, const char* address,
- uint16_t port)
-{
- /* If the address-part contains a colon, it's an IPv6 IP so needs [] */
- if (strstr(address, ":")) {
- snprintf(buf, len, "[%s]:%u", address, port);
- } else {
- snprintf(buf, len, "%s:%u", address, port);
- }
-}
-
-int tcp_socket_outgoing(const char *address, uint16_t port)
-{
- char address_and_port[128];
- combine_addr(address_and_port, 128, address, port);
- return tcp_socket_outgoing_spec(address_and_port);
-}
-
-int tcp_socket_outgoing_spec(const char *address_and_port)
-{
- return inet_connect(address_and_port, true, NULL, NULL);
-}
-
-int tcp_socket_incoming(const char *address, uint16_t port)
-{
- char address_and_port[128];
- combine_addr(address_and_port, 128, address, port);
- return tcp_socket_incoming_spec(address_and_port);
-}
-
-int tcp_socket_incoming_spec(const char *address_and_port)
-{
- char *ostr = NULL;
- int olen = 0;
- return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0, NULL);
-}
-
-int unix_socket_incoming(const char *path)
-{
- char *ostr = NULL;
- int olen = 0;
-
- return unix_listen(path, ostr, olen);
-}
-
-int unix_socket_outgoing(const char *path)
-{
- return unix_connect(path);
-}
-
/* Basic flow for negotiation
Server Client
*/
-static int nbd_receive_options(NBDClient *client)
+static int nbd_send_rep(int csock, uint32_t type, uint32_t opt)
{
- int csock = client->sock;
- char name[256];
- uint32_t tmp, length;
uint64_t magic;
- int rc;
+ uint32_t len;
- /* Client sends:
- [ 0 .. 3] reserved (0)
- [ 4 .. 11] NBD_OPTS_MAGIC
- [12 .. 15] NBD_OPT_EXPORT_NAME
- [16 .. 19] length
- [20 .. xx] export name (length bytes)
- */
-
- rc = -EINVAL;
- if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
- LOG("read failed");
- goto fail;
+ magic = cpu_to_be64(NBD_REP_MAGIC);
+ if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
+ LOG("write failed (rep magic)");
+ return -EINVAL;
}
- TRACE("Checking reserved");
- if (tmp != 0) {
- LOG("Bad reserved received");
- goto fail;
+ opt = cpu_to_be32(opt);
+ if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
+ LOG("write failed (rep opt)");
+ return -EINVAL;
}
-
- if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
- LOG("read failed");
- goto fail;
+ type = cpu_to_be32(type);
+ if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) {
+ LOG("write failed (rep type)");
+ return -EINVAL;
}
- TRACE("Checking reserved");
- if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
- LOG("Bad magic received");
- goto fail;
+ len = cpu_to_be32(0);
+ if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) {
+ LOG("write failed (rep data length)");
+ return -EINVAL;
}
+ return 0;
+}
- if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
- LOG("read failed");
- goto fail;
+static int nbd_send_rep_list(int csock, NBDExport *exp)
+{
+ uint64_t magic, name_len;
+ uint32_t opt, type, len;
+
+ name_len = strlen(exp->name);
+ magic = cpu_to_be64(NBD_REP_MAGIC);
+ if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
+ LOG("write failed (magic)");
+ return -EINVAL;
+ }
+ opt = cpu_to_be32(NBD_OPT_LIST);
+ if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
+ LOG("write failed (opt)");
+ return -EINVAL;
}
- TRACE("Checking option");
- if (tmp != be32_to_cpu(NBD_OPT_EXPORT_NAME)) {
- LOG("Bad option received");
- goto fail;
+ type = cpu_to_be32(NBD_REP_SERVER);
+ if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) {
+ LOG("write failed (reply type)");
+ return -EINVAL;
+ }
+ len = cpu_to_be32(name_len + sizeof(len));
+ if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) {
+ LOG("write failed (length)");
+ return -EINVAL;
+ }
+ len = cpu_to_be32(name_len);
+ if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) {
+ LOG("write failed (length)");
+ return -EINVAL;
+ }
+ if (write_sync(csock, exp->name, name_len) != name_len) {
+ LOG("write failed (buffer)");
+ return -EINVAL;
}
+ return 0;
+}
- if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) {
- LOG("read failed");
- goto fail;
+static int nbd_handle_list(NBDClient *client, uint32_t length)
+{
+ int csock;
+ NBDExport *exp;
+
+ csock = client->sock;
+ if (length) {
+ return nbd_send_rep(csock, NBD_REP_ERR_INVALID, NBD_OPT_LIST);
+ }
+
+ /* For each export, send a NBD_REP_SERVER reply. */
+ QTAILQ_FOREACH(exp, &exports, next) {
+ if (nbd_send_rep_list(csock, exp)) {
+ return -EINVAL;
+ }
}
+ /* Finish with a NBD_REP_ACK. */
+ return nbd_send_rep(csock, NBD_REP_ACK, NBD_OPT_LIST);
+}
+
+static int nbd_handle_export_name(NBDClient *client, uint32_t length)
+{
+ int rc = -EINVAL, csock = client->sock;
+ char name[256];
+
+ /* Client sends:
+ [20 .. xx] export name (length bytes)
+ */
TRACE("Checking length");
- length = be32_to_cpu(length);
if (length > 255) {
LOG("Bad length received");
goto fail;
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
nbd_export_get(client->exp);
-
- TRACE("Option negotiation succeeded.");
rc = 0;
fail:
return rc;
}
+static int nbd_receive_options(NBDClient *client)
+{
+ while (1) {
+ int csock = client->sock;
+ uint32_t tmp, length;
+ uint64_t magic;
+
+ /* Client sends:
+ [ 0 .. 3] client flags
+ [ 4 .. 11] NBD_OPTS_MAGIC
+ [12 .. 15] NBD option
+ [16 .. 19] length
+ ... Rest of request
+ */
+
+ if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+ LOG("read failed");
+ return -EINVAL;
+ }
+ TRACE("Checking client flags");
+ tmp = be32_to_cpu(tmp);
+ if (tmp != 0 && tmp != NBD_FLAG_C_FIXED_NEWSTYLE) {
+ LOG("Bad client flags received");
+ return -EINVAL;
+ }
+
+ if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
+ LOG("read failed");
+ return -EINVAL;
+ }
+ TRACE("Checking opts magic");
+ if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
+ LOG("Bad magic received");
+ return -EINVAL;
+ }
+
+ if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+ LOG("read failed");
+ return -EINVAL;
+ }
+
+ if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) {
+ LOG("read failed");
+ return -EINVAL;
+ }
+ length = be32_to_cpu(length);
+
+ TRACE("Checking option");
+ switch (be32_to_cpu(tmp)) {
+ case NBD_OPT_LIST:
+ if (nbd_handle_list(client, length) < 0) {
+ return 1;
+ }
+ break;
+
+ case NBD_OPT_ABORT:
+ return -EINVAL;
+
+ case NBD_OPT_EXPORT_NAME:
+ return nbd_handle_export_name(client, length);
+
+ default:
+ tmp = be32_to_cpu(tmp);
+ LOG("Unsupported option 0x%x", tmp);
+ nbd_send_rep(client->sock, NBD_REP_ERR_UNSUP, tmp);
+ return -EINVAL;
+ }
+ }
+}
+
static int nbd_send_negotiate(NBDClient *client)
{
int csock = client->sock;
[ 8 .. 15] magic (NBD_CLIENT_MAGIC)
[16 .. 23] size
[24 .. 25] server flags (0)
- [24 .. 27] export flags
+ [26 .. 27] export flags
[28 .. 151] reserved (0)
Negotiation header with options, part 1:
[28 .. 151] reserved (0)
*/
- socket_set_block(csock);
+ qemu_set_block(csock);
rc = -EINVAL;
TRACE("Beginning negotiation.");
+ memset(buf, 0, sizeof(buf));
memcpy(buf, "NBDMAGIC", 8);
if (client->exp) {
assert ((client->exp->nbdflags & ~65535) == 0);
cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
} else {
cpu_to_be64w((uint64_t*)(buf + 8), NBD_OPTS_MAGIC);
+ cpu_to_be16w((uint16_t *)(buf + 16), NBD_FLAG_FIXED_NEWSTYLE);
}
- memset(buf + 28, 0, 124);
if (client->exp) {
if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
goto fail;
}
rc = nbd_receive_options(client);
- if (rc < 0) {
+ if (rc != 0) {
LOG("option negotiation failed");
goto fail;
}
TRACE("Negotiation succeeded.");
rc = 0;
fail:
- socket_set_nonblock(csock);
+ qemu_set_nonblock(csock);
return rc;
}
TRACE("Receiving negotiation.");
- socket_set_block(csock);
rc = -EINVAL;
if (read_sync(csock, buf, 8) != 8) {
rc = 0;
fail:
- socket_set_nonblock(csock);
return rc;
}
return -serrno;
}
- if (flags & NBD_FLAG_READ_ONLY) {
- int read_only = 1;
- TRACE("Setting readonly attribute");
+ if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) {
+ if (errno == ENOTTY) {
+ int read_only = (flags & NBD_FLAG_READ_ONLY) != 0;
+ TRACE("Setting readonly attribute");
- if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
+ if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
+ int serrno = errno;
+ LOG("Failed setting read-only attribute");
+ return -serrno;
+ }
+ } else {
int serrno = errno;
- LOG("Failed setting read-only attribute");
+ LOG("Failed setting flags");
return -serrno;
}
}
- if (ioctl(fd, NBD_SET_FLAGS, flags) < 0
- && errno != ENOTTY) {
- int serrno = errno;
- LOG("Failed setting flags");
- return -serrno;
- }
-
TRACE("Negotiation ended");
return 0;
static NBDRequest *nbd_request_get(NBDClient *client)
{
NBDRequest *req;
- NBDExport *exp = client->exp;
assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
client->nb_requests++;
- if (QSIMPLEQ_EMPTY(&exp->requests)) {
- req = g_malloc0(sizeof(NBDRequest));
- req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE);
- } else {
- req = QSIMPLEQ_FIRST(&exp->requests);
- QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
- }
+ req = g_slice_new0(NBDRequest);
nbd_client_get(client);
req->client = client;
return req;
static void nbd_request_put(NBDRequest *req)
{
NBDClient *client = req->client;
- QSIMPLEQ_INSERT_HEAD(&client->exp->requests, req, entry);
+
+ if (req->data) {
+ qemu_vfree(req->data);
+ }
+ g_slice_free(NBDRequest, req);
+
if (client->nb_requests-- == MAX_NBD_REQUESTS) {
qemu_notify_event();
}
void (*close)(NBDExport *))
{
NBDExport *exp = g_malloc0(sizeof(NBDExport));
- QSIMPLEQ_INIT(&exp->requests);
exp->refcount = 1;
QTAILQ_INIT(&exp->clients);
exp->bs = bs;
exp->nbdflags = nbdflags;
exp->size = size == -1 ? bdrv_getlength(bs) : size;
exp->close = close;
+ bdrv_ref(bs);
return exp;
}
}
nbd_export_set_name(exp, NULL);
nbd_export_put(exp);
+ if (exp->bs) {
+ bdrv_unref(exp->bs);
+ exp->bs = NULL;
+ }
}
void nbd_export_get(NBDExport *exp)
exp->close(exp);
}
- while (!QSIMPLEQ_EMPTY(&exp->requests)) {
- NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
- QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
- qemu_vfree(first->data);
- g_free(first);
- }
-
g_free(exp);
}
}
{
NBDClient *client = req->client;
int csock = client->sock;
+ uint32_t command;
ssize_t rc;
client->recv_coroutine = qemu_coroutine_self();
goto out;
}
- if (request->len > NBD_BUFFER_SIZE) {
+ if (request->len > NBD_MAX_BUFFER_SIZE) {
LOG("len (%u) is larger than max len (%u)",
- request->len, NBD_BUFFER_SIZE);
+ request->len, NBD_MAX_BUFFER_SIZE);
rc = -EINVAL;
goto out;
}
TRACE("Decoding type");
- if ((request->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
+ command = request->type & NBD_CMD_MASK_COMMAND;
+ if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
+ req->data = qemu_blockalign(client->exp->bs, request->len);
+ }
+ if (command == NBD_CMD_WRITE) {
TRACE("Reading %u byte(s)", request->len);
if (qemu_co_recv(csock, req->data, request->len) != request->len) {
struct nbd_request request;
struct nbd_reply reply;
ssize_t ret;
+ uint32_t command;
TRACE("Reading request.");
if (client->closing) {
reply.error = -ret;
goto error_reply;
}
-
- if ((request.from + request.len) > exp->size) {
+ command = request.type & NBD_CMD_MASK_COMMAND;
+ if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) {
LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
", Offset: %" PRIu64 "\n",
request.from, request.len,
goto invalid_request;
}
- switch (request.type & NBD_CMD_MASK_COMMAND) {
+ switch (command) {
case NBD_CMD_READ:
TRACE("Request type is READ");
client->refcount = 1;
client->exp = exp;
client->sock = csock;
- if (nbd_send_negotiate(client) < 0) {
+ if (nbd_send_negotiate(client)) {
g_free(client);
return NULL;
}