X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/8007429a99d6ea8480ba0a7a5fb5ae92473f798c..b6af097528caba5b23b79db3f1f1fd08fa4fa11e:/block/sheepdog.c diff --git a/block/sheepdog.c b/block/sheepdog.c index 2d78ef91f7..9585beb73e 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI 0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE 0x01 #define SD_FLAG_CMD_COW 0x02 @@ -91,6 +92,7 @@ #define SD_NR_VDIS (1U << 24) #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) +#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22 /* * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and * (SD_EC_MAX_STRIP - 1) for parity strips @@ -167,7 +169,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; - uint8_t reserved[2]; + uint8_t store_policy; + uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +189,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint32_t result; + uint8_t nr_copies; + uint8_t copy_policy; + uint8_t block_size_shift; + uint8_t __pad1; + uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -300,8 +318,12 @@ enum AIOCBState { AIOCB_DISCARD_OBJ, }; +#define AIOCBOverwrapping(x, y) \ + (!(x->max_affect_data_idx < y->min_affect_data_idx \ + || y->max_affect_data_idx < x->min_affect_data_idx)) + struct SheepdogAIOCB { - BlockDriverAIOCB common; + BlockAIOCB common; QEMUIOVector *qiov; @@ -316,6 +338,11 @@ struct SheepdogAIOCB { bool cancelable; int nr_pending; + + uint32_t min_affect_data_idx; + uint32_t max_affect_data_idx; + + QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings; }; typedef struct BDRVSheepdogState { @@ -344,8 +371,10 @@ typedef struct BDRVSheepdogState { /* Every aio request must be linked to either of these queues. */ QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head; - QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head; QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head; + + CoQueue overwrapping_queue; + QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head; } BDRVSheepdogState; static const char * sd_strerror(int err) @@ -473,20 +502,14 @@ static bool sd_acb_cancelable(const SheepdogAIOCB *acb) return true; } -static void sd_aio_cancel(BlockDriverAIOCB *blockacb) +static void sd_aio_cancel(BlockAIOCB *blockacb) { SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb; BDRVSheepdogState *s = acb->common.bs->opaque; AIOReq *aioreq, *next; if (sd_acb_cancelable(acb)) { - /* Remove outstanding requests from pending and failed queues. */ - QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings, - next) { - if (aioreq->aiocb == acb) { - free_aio_req(s, aioreq); - } - } + /* Remove outstanding requests from failed queue. */ QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings, next) { if (aioreq->aiocb == acb) { @@ -511,6 +534,10 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors) { SheepdogAIOCB *acb; + uint32_t object_size; + BDRVSheepdogState *s = bs->opaque; + + object_size = (UINT32_C(1) << s->inode.block_size_shift); acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL); @@ -524,9 +551,15 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, acb->coroutine = qemu_coroutine_self(); acb->ret = 0; acb->nr_pending = 0; + + acb->min_affect_data_idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size; + acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE + + acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size; + return acb; } +/* Return -EIO in case of error, file descriptor on success */ static int connect_to_sdog(BDRVSheepdogState *s, Error **errp) { int fd; @@ -546,11 +579,14 @@ static int connect_to_sdog(BDRVSheepdogState *s, Error **errp) if (fd >= 0) { qemu_set_nonblock(fd); + } else { + fd = -EIO; } return fd; } +/* Return 0 on success and -errno in case of error */ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, unsigned int *wlen) { @@ -559,11 +595,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); if (ret != sizeof(*hdr)) { error_report("failed to send a req, %s", strerror(errno)); + ret = -socket_error(); return ret; } ret = qemu_co_send(sockfd, data, *wlen); if (ret != *wlen) { + ret = -socket_error(); error_report("failed to send a req, %s", strerror(errno)); } @@ -638,6 +676,11 @@ out: srco->finished = true; } +/* + * Send the request to the sheep in a synchronous manner. + * + * Return 0 on success, -errno in case of error. + */ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr, void *data, unsigned int *wlen, unsigned int *rlen) { @@ -674,38 +717,6 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); static int get_sheep_fd(BDRVSheepdogState *s, Error **errp); static void co_write_request(void *opaque); -static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid) -{ - AIOReq *aio_req; - - QLIST_FOREACH(aio_req, &s->pending_aio_head, aio_siblings) { - if (aio_req->oid == oid) { - return aio_req; - } - } - - return NULL; -} - -/* - * This function searchs pending requests to the object `oid', and - * sends them. - */ -static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) -{ - AIOReq *aio_req; - SheepdogAIOCB *acb; - - while ((aio_req = find_pending_req(s, oid)) != NULL) { - acb = aio_req->aiocb; - /* move aio_req from pending list to inflight one */ - QLIST_REMOVE(aio_req, aio_siblings); - QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, - acb->aiocb_type); - } -} - static coroutine_fn void reconnect_to_sdog(void *opaque) { BDRVSheepdogState *s = opaque; @@ -726,8 +737,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque) s->fd = get_sheep_fd(s, &local_err); if (s->fd < 0) { DPRINTF("Wait for connection to be established\n"); - error_report("%s", error_get_pretty(local_err)); - error_free(local_err); + error_report_err(local_err); co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME, 1000000000ULL); } @@ -812,12 +822,6 @@ static void coroutine_fn aio_read_response(void *opaque) s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx); s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx); } - /* - * Some requests may be blocked because simultaneous - * create requests are not allowed, so we search the - * pending requests here. - */ - send_pending_req(s, aio_req->oid); } break; case AIOCB_READ_UDATA: @@ -1283,8 +1287,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag) fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); return -EIO; } @@ -1292,8 +1295,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag) ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err); if (ret) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); goto out; } @@ -1315,30 +1317,6 @@ out: return ret; } -/* Return true if the specified request is linked to the pending list. */ -static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req) -{ - AIOReq *areq; - QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) { - if (areq != aio_req && areq->oid == aio_req->oid) { - /* - * Sheepdog cannot handle simultaneous create requests to the same - * object, so we cannot send the request until the previous request - * finishes. - */ - DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid); - aio_req->flags = 0; - aio_req->base_oid = 0; - aio_req->create = false; - QLIST_REMOVE(aio_req, aio_siblings); - QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings); - return true; - } - } - - return false; -} - static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req->aiocb; @@ -1353,10 +1331,6 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) goto out; } - if (check_simultaneous_create(s, aio_req)) { - return; - } - if (s->inode.data_vdi_id[idx]) { aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx); aio_req->flags |= SD_FLAG_CMD_COW; @@ -1432,8 +1406,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, filename = qemu_opt_get(opts, "filename"); QLIST_INIT(&s->inflight_aio_head); - QLIST_INIT(&s->pending_aio_head); QLIST_INIT(&s->failed_aio_head); + QLIST_INIT(&s->inflight_aiocb_head); s->fd = -1; memset(vdi, 0, sizeof(vdi)); @@ -1498,6 +1472,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE; pstrcpy(s->name, sizeof(s->name), vdi); qemu_co_mutex_init(&s->lock); + qemu_co_queue_init(&s->overwrapping_queue); qemu_opts_del(opts); g_free(buf); return 0; @@ -1544,6 +1519,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s->inode.vdi_size; hdr.copy_policy = s->inode.copy_policy; hdr.copies = s->inode.nr_copies; + hdr.block_size_shift = s->inode.block_size_shift; ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen); @@ -1569,9 +1545,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; + BDRVSheepdogState *base = NULL; + unsigned long buf_size; uint32_t idx, max_idx; + uint32_t object_size; int64_t vdi_size; - void *buf = g_malloc0(SD_DATA_OBJ_SIZE); + void *buf = NULL; int ret; ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1585,18 +1564,24 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } - max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + + base = bs->opaque; + object_size = (UINT32_C(1) << base->inode.block_size_shift); + buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); + buf = g_malloc0(buf_size); + + max_idx = DIV_ROUND_UP(vdi_size, buf_size); for (idx = 0; idx < max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ - ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); + ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } - ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); + ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } @@ -1669,6 +1654,27 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt) return 0; } +static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt) +{ + struct SheepdogInode *inode = &s->inode; + uint64_t object_size; + int obj_order; + + object_size = qemu_opt_get_size_del(opt, BLOCK_OPT_OBJECT_SIZE, 0); + if (object_size) { + if ((object_size - 1) & object_size) { /* not a power of 2? */ + return -EINVAL; + } + obj_order = ctz32(object_size); + if (obj_order < 20 || obj_order > 31) { + return -EINVAL; + } + inode->block_size_shift = (uint8_t)obj_order; + } + + return 0; +} + static int sd_create(const char *filename, QemuOpts *opts, Error **errp) { @@ -1679,6 +1685,7 @@ static int sd_create(const char *filename, QemuOpts *opts, BDRVSheepdogState *s; char tag[SD_MAX_VDI_TAG_LEN]; uint32_t snapid; + uint64_t max_vdi_size; bool prealloc = false; s = g_new0(BDRVSheepdogState, 1); @@ -1717,10 +1724,11 @@ static int sd_create(const char *filename, QemuOpts *opts, goto out; } } - - if (s->inode.vdi_size > SD_MAX_VDI_SIZE) { - error_setg(errp, "too big image size"); - ret = -EINVAL; + ret = parse_block_size_shift(s, opts); + if (ret < 0) { + error_setg(errp, "Invalid object_size." + " obect_size needs to be power of 2" + " and be limited from 2^20 to 2^31"); goto out; } @@ -1730,7 +1738,7 @@ static int sd_create(const char *filename, QemuOpts *opts, BlockDriver *drv; /* Currently, only Sheepdog backing image is supported. */ - drv = bdrv_find_protocol(backing_file, true); + drv = bdrv_find_protocol(backing_file, true, NULL); if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) { error_setg(errp, "backing_file must be a sheepdog image"); ret = -EINVAL; @@ -1757,6 +1765,51 @@ static int sd_create(const char *filename, QemuOpts *opts, } s->aio_context = qemu_get_aio_context(); + + /* if block_size_shift is not specified, get cluster default value */ + if (s->inode.block_size_shift == 0) { + SheepdogVdiReq hdr; + SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr; + Error *local_err = NULL; + int fd; + unsigned int wlen = 0, rlen = 0; + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report("%s", error_get_pretty(local_err)); + error_free(local_err); + ret = -EIO; + goto out; + } + + memset(&hdr, 0, sizeof(hdr)); + hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT; + hdr.proto_ver = SD_PROTO_VER; + + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, + NULL, &wlen, &rlen); + closesocket(fd); + if (ret) { + error_setg_errno(errp, -ret, "failed to get cluster default"); + goto out; + } + if (rsp->result == SD_RES_SUCCESS) { + s->inode.block_size_shift = rsp->block_size_shift; + } else { + s->inode.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT; + } + } + + max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; + + if (s->inode.vdi_size > max_vdi_size) { + error_setg(errp, "An image is too large." + " The maximum image size is %"PRIu64 "GB", + max_vdi_size / 1024 / 1024 / 1024); + ret = -EINVAL; + goto out; + } + ret = do_sd_create(s, &vid, 0, errp); if (ret) { goto out; @@ -1785,8 +1838,7 @@ static void sd_close(BlockDriverState *bs) fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); return; } @@ -1827,19 +1879,20 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) BDRVSheepdogState *s = bs->opaque; int ret, fd; unsigned int datalen; + uint64_t max_vdi_size; + max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; if (offset < s->inode.vdi_size) { error_report("shrinking is not supported"); return -EINVAL; - } else if (offset > SD_MAX_VDI_SIZE) { + } else if (offset > max_vdi_size) { error_report("too big image size"); return -EINVAL; } fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); return fd; } @@ -1912,8 +1965,7 @@ static bool sd_delete(BDRVSheepdogState *s) fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); return false; } @@ -1960,8 +2012,7 @@ static int sd_create_branch(BDRVSheepdogState *s) deleted = sd_delete(s); ret = do_sd_create(s, &vid, !deleted, &local_err); if (ret) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); goto out; } @@ -1969,8 +2020,7 @@ static int sd_create_branch(BDRVSheepdogState *s) fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); ret = fd; goto out; } @@ -2013,9 +2063,10 @@ static int coroutine_fn sd_co_rw_vector(void *p) SheepdogAIOCB *acb = p; int ret = 0; unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE; - unsigned long idx = acb->sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE; + unsigned long idx; + uint32_t object_size; uint64_t oid; - uint64_t offset = (acb->sector_num * BDRV_SECTOR_SIZE) % SD_DATA_OBJ_SIZE; + uint64_t offset; BDRVSheepdogState *s = acb->common.bs->opaque; SheepdogInode *inode = &s->inode; AIOReq *aio_req; @@ -2032,6 +2083,10 @@ static int coroutine_fn sd_co_rw_vector(void *p) } } + object_size = (UINT32_C(1) << inode->block_size_shift); + idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size; + offset = (acb->sector_num * BDRV_SECTOR_SIZE) % object_size; + /* * Make sure we don't free the aiocb before we are done with all requests. * This additional reference is dropped at the end of this function. @@ -2045,7 +2100,7 @@ static int coroutine_fn sd_co_rw_vector(void *p) oid = vid_to_data_oid(inode->data_vdi_id[idx], idx); - len = MIN(total - done, SD_DATA_OBJ_SIZE - offset); + len = MIN(total - done, object_size - offset); switch (acb->aiocb_type) { case AIOCB_READ_UDATA: @@ -2069,7 +2124,7 @@ static int coroutine_fn sd_co_rw_vector(void *p) * We discard the object only when the whole object is * 1) allocated 2) trimmed. Otherwise, simply skip it. */ - if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) { + if (len != object_size || inode->data_vdi_id[idx] == 0) { goto done; } break; @@ -2089,12 +2144,6 @@ static int coroutine_fn sd_co_rw_vector(void *p) old_oid, done); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - if (create) { - if (check_simultaneous_create(s, aio_req)) { - goto done; - } - } - add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, acb->aiocb_type); done: @@ -2109,6 +2158,20 @@ out: return 1; } +static bool check_overwrapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) +{ + SheepdogAIOCB *cb; + + QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) { + if (AIOCBOverwrapping(aiocb, cb)) { + return true; + } + } + + QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings); + return false; +} + static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -2117,7 +2180,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE; BDRVSheepdogState *s = bs->opaque; - if (bs->growable && offset > s->inode.vdi_size) { + if (offset > s->inode.vdi_size) { ret = sd_truncate(bs, offset); if (ret < 0) { return ret; @@ -2128,14 +2191,25 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, acb->aio_done_func = sd_write_done; acb->aiocb_type = AIOCB_WRITE_UDATA; +retry: + if (check_overwrapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overwrapping_queue); + goto retry; + } + ret = sd_co_rw_vector(acb); if (ret <= 0) { + QLIST_REMOVE(acb, aiocb_siblings); + qemu_co_queue_restart_all(&s->overwrapping_queue); qemu_aio_unref(acb); return ret; } qemu_coroutine_yield(); + QLIST_REMOVE(acb, aiocb_siblings); + qemu_co_queue_restart_all(&s->overwrapping_queue); + return acb->ret; } @@ -2144,19 +2218,30 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, { SheepdogAIOCB *acb; int ret; + BDRVSheepdogState *s = bs->opaque; acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors); acb->aiocb_type = AIOCB_READ_UDATA; acb->aio_done_func = sd_finish_aiocb; +retry: + if (check_overwrapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overwrapping_queue); + goto retry; + } + ret = sd_co_rw_vector(acb); if (ret <= 0) { + QLIST_REMOVE(acb, aiocb_siblings); + qemu_co_queue_restart_all(&s->overwrapping_queue); qemu_aio_unref(acb); return ret; } qemu_coroutine_yield(); + QLIST_REMOVE(acb, aiocb_siblings); + qemu_co_queue_restart_all(&s->overwrapping_queue); return acb->ret; } @@ -2218,8 +2303,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) /* refresh inode. */ fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); ret = fd; goto cleanup; } @@ -2234,10 +2318,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) ret = do_sd_create(s, &new_vid, 1, &local_err); if (ret < 0) { - error_report("%s", error_get_pretty(local_err));; + error_report("failed to create inode for snapshot: %s", + error_get_pretty(local_err)); error_free(local_err); - error_report("failed to create inode for snapshot. %s", - strerror(errno)); goto cleanup; } @@ -2336,8 +2419,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); ret = fd; goto out; } @@ -2366,8 +2448,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); ret = fd; goto out; } @@ -2426,19 +2507,19 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, uint64_t offset; uint32_t vdi_index; uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id; + uint32_t object_size = (UINT32_C(1) << s->inode.block_size_shift); fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err));; - error_free(local_err); + error_report_err(local_err); return fd; } while (remaining) { - vdi_index = pos / SD_DATA_OBJ_SIZE; - offset = pos % SD_DATA_OBJ_SIZE; + vdi_index = pos / object_size; + offset = pos % object_size; - data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset); + data_len = MIN(remaining, object_size - offset); vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index); @@ -2508,14 +2589,25 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num, acb->aiocb_type = AIOCB_DISCARD_OBJ; acb->aio_done_func = sd_finish_aiocb; +retry: + if (check_overwrapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overwrapping_queue); + goto retry; + } + ret = sd_co_rw_vector(acb); if (ret <= 0) { + QLIST_REMOVE(acb, aiocb_siblings); + qemu_co_queue_restart_all(&s->overwrapping_queue); qemu_aio_unref(acb); return ret; } qemu_coroutine_yield(); + QLIST_REMOVE(acb, aiocb_siblings); + qemu_co_queue_restart_all(&s->overwrapping_queue); + return acb->ret; } @@ -2525,10 +2617,11 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; + uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); uint64_t offset = sector_num * BDRV_SECTOR_SIZE; - unsigned long start = offset / SD_DATA_OBJ_SIZE, + unsigned long start = offset / object_size, end = DIV_ROUND_UP((sector_num + nb_sectors) * - BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE); + BDRV_SECTOR_SIZE, object_size); unsigned long idx; int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; @@ -2547,7 +2640,7 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, } } - *pnum = (idx - start) * SD_DATA_OBJ_SIZE / BDRV_SECTOR_SIZE; + *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE; if (*pnum > nb_sectors) { *pnum = nb_sectors; } @@ -2558,14 +2651,15 @@ static int64_t sd_get_allocated_file_size(BlockDriverState *bs) { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; - unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, SD_DATA_OBJ_SIZE); + uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); + unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, object_size); uint64_t size = 0; for (i = 0; i < last; i++) { if (inode->data_vdi_id[i] == 0) { continue; } - size += SD_DATA_OBJ_SIZE; + size += object_size; } return size; } @@ -2594,6 +2688,11 @@ static QemuOptsList sd_create_opts = { .type = QEMU_OPT_STRING, .help = "Redundancy of the image" }, + { + .name = BLOCK_OPT_OBJECT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Object size of the image" + }, { /* end of list */ } } };