*/
#include "qemu-common.h"
+#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "block/block_int.h"
#define SD_PROTO_VER 0x01
#define SD_DEFAULT_ADDR "localhost"
-#define SD_DEFAULT_PORT "7000"
+#define SD_DEFAULT_PORT 7000
#define SD_OP_CREATE_AND_WRITE_OBJ 0x01
#define SD_OP_READ_OBJ 0x02
#define SD_RES_WAIT_FOR_FORMAT 0x16 /* Waiting for a format operation */
#define SD_RES_WAIT_FOR_JOIN 0x17 /* Waiting for other nodes joining */
#define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */
+#define SD_RES_HALT 0x19 /* Sheepdog is stopped serving IO request */
/*
* Object ID rules
uint32_t id;
uint32_t data_length;
uint64_t vdi_size;
- uint32_t base_vdi_id;
+ uint32_t vdi_id;
uint32_t copies;
uint32_t snapid;
uint32_t pad[3];
bool is_snapshot;
uint32_t cache_flags;
- char *addr;
- char *port;
+ char *host_spec;
+ bool is_unix;
int fd;
CoMutex lock;
{SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"},
{SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"},
{SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"},
+ {SD_RES_HALT, "Sheepdog is stopped serving IO request"},
};
for (i = 0; i < ARRAY_SIZE(errors); ++i) {
};
static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+ int64_t sector_num, int nb_sectors)
{
SheepdogAIOCB *acb;
- acb = qemu_aio_get(&sd_aiocb_info, bs, cb, opaque);
+ acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
acb->qiov = qiov;
return acb;
}
-static int connect_to_sdog(const char *addr, const char *port)
+static int connect_to_sdog(BDRVSheepdogState *s)
{
- char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
- int fd, ret;
- struct addrinfo hints, *res, *res0;
-
- if (!addr) {
- addr = SD_DEFAULT_ADDR;
- port = SD_DEFAULT_PORT;
- }
-
- memset(&hints, 0, sizeof(hints));
- hints.ai_socktype = SOCK_STREAM;
-
- ret = getaddrinfo(addr, port, &hints, &res0);
- if (ret) {
- error_report("unable to get address info %s, %s",
- addr, strerror(errno));
- return -errno;
- }
-
- for (res = res0; res; res = res->ai_next) {
- ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf),
- sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
- if (ret) {
- continue;
- }
+ int fd;
+ Error *err = NULL;
- fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
- if (fd < 0) {
- continue;
- }
+ if (s->is_unix) {
+ fd = unix_connect(s->host_spec, &err);
+ } else {
+ fd = inet_connect(s->host_spec, &err);
- reconnect:
- ret = connect(fd, res->ai_addr, res->ai_addrlen);
- if (ret < 0) {
- if (errno == EINTR) {
- goto reconnect;
+ if (err == NULL) {
+ int ret = socket_set_nodelay(fd);
+ if (ret < 0) {
+ error_report("%s", strerror(errno));
}
- close(fd);
- break;
}
+ }
- dprintf("connected to %s:%s\n", addr, port);
- goto success;
+ if (err != NULL) {
+ qerror_report_err(err);
+ error_free(err);
+ } else {
+ qemu_set_nonblock(fd);
}
- fd = -errno;
- error_report("failed connect to %s:%s", addr, port);
-success:
- freeaddrinfo(res0);
+
return fd;
}
qemu_coroutine_enter(co, NULL);
}
+static int have_co_req(void *opaque)
+{
+ /* this handler is set only when there is a pending request, so
+ * always returns 1. */
+ return 1;
+}
+
typedef struct SheepdogReqCo {
int sockfd;
SheepdogReq *hdr;
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
- qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, NULL, co);
+ qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, have_co_req, co);
- socket_set_block(sockfd);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
- qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, NULL, co);
+ qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, have_co_req, co);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret < sizeof(*hdr)) {
}
ret = 0;
out:
+ /* there is at most one request for this sockfd, so it is safe to
+ * set each handler to NULL. */
qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL, NULL);
- socket_set_nonblock(sockfd);
srco->ret = ret;
srco->finished = true;
!QLIST_EMPTY(&s->pending_aio_head);
}
-static int set_nodelay(int fd)
-{
- int ret, opt;
-
- opt = 1;
- ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
- return ret;
-}
-
/*
* Return a socket discriptor to read/write objects.
*
*/
static int get_sheep_fd(BDRVSheepdogState *s)
{
- int ret, fd;
+ int fd;
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
- error_report("%s", strerror(errno));
return fd;
}
- socket_set_nonblock(fd);
+ qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
+ return fd;
+}
- ret = set_nodelay(fd);
- if (ret) {
- error_report("%s", strerror(errno));
- closesocket(fd);
- return -errno;
+static int sd_parse_uri(BDRVSheepdogState *s, const char *filename,
+ char *vdi, uint32_t *snapid, char *tag)
+{
+ URI *uri;
+ QueryParams *qp = NULL;
+ int ret = 0;
+
+ uri = uri_parse(filename);
+ if (!uri) {
+ return -EINVAL;
}
- qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
- return fd;
+ /* transport */
+ if (!strcmp(uri->scheme, "sheepdog")) {
+ s->is_unix = false;
+ } else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
+ s->is_unix = false;
+ } else if (!strcmp(uri->scheme, "sheepdog+unix")) {
+ s->is_unix = true;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (uri->path == NULL || !strcmp(uri->path, "/")) {
+ ret = -EINVAL;
+ goto out;
+ }
+ pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1);
+
+ qp = query_params_parse(uri->query);
+ if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (s->is_unix) {
+ /* sheepdog+unix:///vdiname?socket=path */
+ if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
+ ret = -EINVAL;
+ goto out;
+ }
+ s->host_spec = g_strdup(qp->p[0].value);
+ } else {
+ /* sheepdog[+tcp]://[host:port]/vdiname */
+ s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR,
+ uri->port ?: SD_DEFAULT_PORT);
+ }
+
+ /* snapshot tag */
+ if (uri->fragment) {
+ *snapid = strtoul(uri->fragment, NULL, 10);
+ if (*snapid == 0) {
+ pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment);
+ }
+ } else {
+ *snapid = CURRENT_VDI_ID; /* search current vdi */
+ }
+
+out:
+ if (qp) {
+ query_params_free(qp);
+ }
+ uri_free(uri);
+ return ret;
}
/*
- * Parse a filename
+ * Parse a filename (old syntax)
*
* filename must be one of the following formats:
* 1. [vdiname]
static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
char *vdi, uint32_t *snapid, char *tag)
{
- char *p, *q;
- int nr_sep;
+ char *p, *q, *uri;
+ const char *host_spec, *vdi_spec;
+ int nr_sep, ret;
+ strstart(filename, "sheepdog:", (const char **)&filename);
p = q = g_strdup(filename);
/* count the number of separators */
}
p = q;
- /* use the first two tokens as hostname and port number. */
+ /* use the first two tokens as host_spec. */
if (nr_sep >= 2) {
- s->addr = p;
+ host_spec = p;
p = strchr(p, ':');
- *p++ = '\0';
-
- s->port = p;
+ p++;
p = strchr(p, ':');
*p++ = '\0';
} else {
- s->addr = NULL;
- s->port = 0;
+ host_spec = "";
}
- pstrcpy(vdi, SD_MAX_VDI_LEN, p);
+ vdi_spec = p;
- p = strchr(vdi, ':');
+ p = strchr(vdi_spec, ':');
if (p) {
- *p++ = '\0';
- *snapid = strtoul(p, NULL, 10);
- if (*snapid == 0) {
- pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p);
- }
- } else {
- *snapid = CURRENT_VDI_ID; /* search current vdi */
+ *p++ = '#';
}
- if (s->addr == NULL) {
- g_free(q);
- }
+ uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);
- return 0;
+ ret = sd_parse_uri(s, uri, vdi, snapid, tag);
+
+ g_free(q);
+ g_free(uri);
+
+ return ret;
}
static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
unsigned int wlen, rlen = 0;
char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
return fd;
}
create, cache_flags);
}
-static int sd_open(BlockDriverState *bs, const char *filename, int flags)
+static int sd_open(BlockDriverState *bs, const char *filename,
+ QDict *options, int flags)
{
int ret, fd;
uint32_t vid = 0;
uint32_t snapid;
char *buf = NULL;
- strstart(filename, "sheepdog:", (const char **)&filename);
-
QLIST_INIT(&s->inflight_aio_head);
QLIST_INIT(&s->pending_aio_head);
s->fd = -1;
memset(vdi, 0, sizeof(vdi));
memset(tag, 0, sizeof(tag));
- if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) {
- ret = -EINVAL;
+
+ if (strstr(filename, "://")) {
+ ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
+ } else {
+ ret = parse_vdiname(s, filename, vdi, &snapid, tag);
+ }
+ if (ret < 0) {
goto out;
}
s->fd = get_sheep_fd(s);
s->is_snapshot = true;
}
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
- error_report("failed to connect");
ret = fd;
goto out;
}
return ret;
}
-static int do_sd_create(char *filename, int64_t vdi_size,
- uint32_t base_vid, uint32_t *vdi_id, int snapshot,
- const char *addr, const char *port)
+static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
+ uint32_t base_vid, uint32_t *vdi_id, int snapshot)
{
SheepdogVdiReq hdr;
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
unsigned int wlen, rlen = 0;
char buf[SD_MAX_VDI_LEN];
- fd = connect_to_sdog(addr, port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
return fd;
}
memset(&hdr, 0, sizeof(hdr));
hdr.opcode = SD_OP_NEW_VDI;
- hdr.base_vdi_id = base_vid;
+ hdr.vdi_id = base_vid;
wlen = SD_MAX_VDI_LEN;
void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
int ret;
- ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR);
+ ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
if (ret < 0) {
goto out;
}
char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid;
bool prealloc = false;
- const char *vdiname;
s = g_malloc0(sizeof(BDRVSheepdogState));
- strstart(filename, "sheepdog:", &vdiname);
-
memset(vdi, 0, sizeof(vdi));
memset(tag, 0, sizeof(tag));
- if (parse_vdiname(s, vdiname, vdi, &snapid, tag) < 0) {
- error_report("invalid filename");
- ret = -EINVAL;
+ if (strstr(filename, "://")) {
+ ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
+ } else {
+ ret = parse_vdiname(s, filename, vdi, &snapid, tag);
+ }
+ if (ret < 0) {
goto out;
}
goto out;
}
- ret = bdrv_file_open(&bs, backing_file, 0);
+ ret = bdrv_file_open(&bs, backing_file, NULL, 0);
if (ret < 0) {
goto out;
}
bdrv_delete(bs);
}
- ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s->addr, s->port);
+ ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0);
if (!prealloc || ret) {
goto out;
}
dprintf("%s\n", s->name);
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
return;
}
memset(&hdr, 0, sizeof(hdr));
hdr.opcode = SD_OP_RELEASE_VDI;
+ hdr.vdi_id = s->inode.vdi_id;
wlen = strlen(s->name) + 1;
hdr.data_length = wlen;
hdr.flags = SD_FLAG_CMD_WRITE;
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
closesocket(s->fd);
- g_free(s->addr);
+ g_free(s->host_spec);
}
static int64_t sd_getlength(BlockDriverState *bs)
return -EINVAL;
}
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
return fd;
}
buf = g_malloc(SD_INODE_SIZE);
- ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1,
- s->addr, s->port);
+ ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1);
if (ret) {
goto out;
}
dprintf("%" PRIx32 " is created.\n", vid);
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
- error_report("failed to connect");
ret = fd;
goto out;
}
bs->total_sectors = sector_num + nb_sectors;
}
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL);
+ acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
acb->aio_done_func = sd_write_done;
acb->aiocb_type = AIOCB_WRITE_UDATA;
SheepdogAIOCB *acb;
int ret;
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL);
+ acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
acb->aiocb_type = AIOCB_READ_UDATA;
acb->aio_done_func = sd_finish_aiocb;
return 0;
}
- acb = sd_aio_setup(bs, NULL, 0, 0, NULL, NULL);
+ acb = sd_aio_setup(bs, NULL, 0, 0);
acb->aiocb_type = AIOCB_FLUSH_CACHE;
acb->aio_done_func = sd_finish_aiocb;
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
/* refresh inode. */
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
ret = fd;
goto cleanup;
goto cleanup;
}
- ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1,
- s->addr, s->port);
+ ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid,
+ 1);
if (ret < 0) {
error_report("failed to create inode for snapshot. %s",
strerror(errno));
goto out;
}
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
- error_report("failed to connect");
ret = fd;
goto out;
}
vdi_inuse = g_malloc(max);
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
ret = fd;
goto out;
hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
start_nr = hval & (SD_NR_VDIS - 1);
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
- error_report("failed to connect");
ret = fd;
goto out;
}
uint32_t vdi_index;
uint64_t offset;
- fd = connect_to_sdog(s->addr, s->port);
+ fd = connect_to_sdog(s);
if (fd < 0) {
return fd;
}
return ret;
}
-static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data,
- int64_t pos, int size)
+static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
{
BDRVSheepdogState *s = bs->opaque;
+ void *buf;
+ int ret;
- return do_load_save_vmstate(s, (uint8_t *)data, pos, size, 0);
+ buf = qemu_blockalign(bs, qiov->size);
+ qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
+ ret = do_load_save_vmstate(s, (uint8_t *) buf, pos, qiov->size, 0);
+ qemu_vfree(buf);
+
+ return ret;
}
static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
{ NULL }
};
-BlockDriver bdrv_sheepdog = {
+static BlockDriver bdrv_sheepdog = {
.format_name = "sheepdog",
.protocol_name = "sheepdog",
.instance_size = sizeof(BDRVSheepdogState),
.create_options = sd_create_options,
};
+static BlockDriver bdrv_sheepdog_tcp = {
+ .format_name = "sheepdog",
+ .protocol_name = "sheepdog+tcp",
+ .instance_size = sizeof(BDRVSheepdogState),
+ .bdrv_file_open = sd_open,
+ .bdrv_close = sd_close,
+ .bdrv_create = sd_create,
+ .bdrv_getlength = sd_getlength,
+ .bdrv_truncate = sd_truncate,
+
+ .bdrv_co_readv = sd_co_readv,
+ .bdrv_co_writev = sd_co_writev,
+ .bdrv_co_flush_to_disk = sd_co_flush_to_disk,
+
+ .bdrv_snapshot_create = sd_snapshot_create,
+ .bdrv_snapshot_goto = sd_snapshot_goto,
+ .bdrv_snapshot_delete = sd_snapshot_delete,
+ .bdrv_snapshot_list = sd_snapshot_list,
+
+ .bdrv_save_vmstate = sd_save_vmstate,
+ .bdrv_load_vmstate = sd_load_vmstate,
+
+ .create_options = sd_create_options,
+};
+
+static BlockDriver bdrv_sheepdog_unix = {
+ .format_name = "sheepdog",
+ .protocol_name = "sheepdog+unix",
+ .instance_size = sizeof(BDRVSheepdogState),
+ .bdrv_file_open = sd_open,
+ .bdrv_close = sd_close,
+ .bdrv_create = sd_create,
+ .bdrv_getlength = sd_getlength,
+ .bdrv_truncate = sd_truncate,
+
+ .bdrv_co_readv = sd_co_readv,
+ .bdrv_co_writev = sd_co_writev,
+ .bdrv_co_flush_to_disk = sd_co_flush_to_disk,
+
+ .bdrv_snapshot_create = sd_snapshot_create,
+ .bdrv_snapshot_goto = sd_snapshot_goto,
+ .bdrv_snapshot_delete = sd_snapshot_delete,
+ .bdrv_snapshot_list = sd_snapshot_list,
+
+ .bdrv_save_vmstate = sd_save_vmstate,
+ .bdrv_load_vmstate = sd_load_vmstate,
+
+ .create_options = sd_create_options,
+};
+
static void bdrv_sheepdog_init(void)
{
bdrv_register(&bdrv_sheepdog);
+ bdrv_register(&bdrv_sheepdog_tcp);
+ bdrv_register(&bdrv_sheepdog_unix);
}
block_init(bdrv_sheepdog_init);