]> Git Repo - qemu.git/blobdiff - block/sheepdog.c
util: move declarations out of qemu-common.h
[qemu.git] / block / sheepdog.c
index c14172cfa6c404f95d4e5ace10a288cf5d084dab..48fc16542294cbdb14280922ba42f5821f7f6112 100644 (file)
  * GNU GPL, version 2 or (at your option) any later version.
  */
 
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/bitops.h"
+#include "qemu/cutils.h"
 
 #define SD_PROTO_VER 0x01
 
@@ -28,7 +31,6 @@
 #define SD_OP_READ_OBJ       0x02
 #define SD_OP_WRITE_OBJ      0x03
 /* 0x04 is used internally by Sheepdog */
-#define SD_OP_DISCARD_OBJ    0x05
 
 #define SD_OP_NEW_VDI        0x11
 #define SD_OP_LOCK_VDI       0x12
@@ -284,6 +286,12 @@ static inline bool is_snapshot(struct SheepdogInode *inode)
     return !!inode->snap_ctime;
 }
 
+static inline size_t count_data_objs(const struct SheepdogInode *inode)
+{
+    return DIV_ROUND_UP(inode->vdi_size,
+                        (1UL << inode->block_size_shift));
+}
+
 #undef DPRINTF
 #ifdef DEBUG_SDOG
 #define DPRINTF(fmt, args...)                                       \
@@ -318,6 +326,10 @@ enum AIOCBState {
     AIOCB_DISCARD_OBJ,
 };
 
+#define AIOCBOverlapping(x, y)                                 \
+    (!(x->max_affect_data_idx < y->min_affect_data_idx          \
+       || y->max_affect_data_idx < x->min_affect_data_idx))
+
 struct SheepdogAIOCB {
     BlockAIOCB common;
 
@@ -334,6 +346,20 @@ struct SheepdogAIOCB {
 
     bool cancelable;
     int nr_pending;
+
+    uint32_t min_affect_data_idx;
+    uint32_t max_affect_data_idx;
+
+    /*
+     * The difference between affect_data_idx and dirty_data_idx:
+     * affect_data_idx represents range of index of all request types.
+     * dirty_data_idx represents range of index updated by COW requests.
+     * dirty_data_idx is used for updating an inode object.
+     */
+    uint32_t min_dirty_data_idx;
+    uint32_t max_dirty_data_idx;
+
+    QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
 };
 
 typedef struct BDRVSheepdogState {
@@ -342,9 +368,6 @@ typedef struct BDRVSheepdogState {
 
     SheepdogInode inode;
 
-    uint32_t min_dirty_data_idx;
-    uint32_t max_dirty_data_idx;
-
     char name[SD_MAX_VDI_LEN];
     bool is_snapshot;
     uint32_t cache_flags;
@@ -362,10 +385,17 @@ typedef struct BDRVSheepdogState {
 
     /* Every aio request must be linked to either of these queues. */
     QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
-    QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head;
     QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
+
+    CoQueue overlapping_queue;
+    QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
 } BDRVSheepdogState;
 
+typedef struct BDRVSheepdogReopenState {
+    int fd;
+    int cache_flags;
+} BDRVSheepdogReopenState;
+
 static const char * sd_strerror(int err)
 {
     int i;
@@ -498,13 +528,7 @@ static void sd_aio_cancel(BlockAIOCB *blockacb)
     AIOReq *aioreq, *next;
 
     if (sd_acb_cancelable(acb)) {
-        /* Remove outstanding requests from pending and failed queues.  */
-        QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings,
-                           next) {
-            if (aioreq->aiocb == acb) {
-                free_aio_req(s, aioreq);
-            }
-        }
+        /* Remove outstanding requests from failed queue.  */
         QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
                            next) {
             if (aioreq->aiocb == acb) {
@@ -529,6 +553,10 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
                                    int64_t sector_num, int nb_sectors)
 {
     SheepdogAIOCB *acb;
+    uint32_t object_size;
+    BDRVSheepdogState *s = bs->opaque;
+
+    object_size = (UINT32_C(1) << s->inode.block_size_shift);
 
     acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
 
@@ -542,6 +570,14 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
     acb->coroutine = qemu_coroutine_self();
     acb->ret = 0;
     acb->nr_pending = 0;
+
+    acb->min_affect_data_idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
+    acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
+                              acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;
+
+    acb->min_dirty_data_idx = UINT32_MAX;
+    acb->max_dirty_data_idx = 0;
+
     return acb;
 }
 
@@ -581,14 +617,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
     ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
     if (ret != sizeof(*hdr)) {
         error_report("failed to send a req, %s", strerror(errno));
-        ret = -socket_error();
-        return ret;
+        return -errno;
     }
 
     ret = qemu_co_send(sockfd, data, *wlen);
     if (ret != *wlen) {
-        ret = -socket_error();
         error_report("failed to send a req, %s", strerror(errno));
+        return -errno;
     }
 
     return ret;
@@ -624,14 +659,16 @@ static coroutine_fn void do_co_req(void *opaque)
     unsigned int *rlen = srco->rlen;
 
     co = qemu_coroutine_self();
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, restart_co_req, co);
 
     ret = send_co_req(sockfd, hdr, data, wlen);
     if (ret < 0) {
         goto out;
     }
 
-    aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       restart_co_req, NULL, co);
 
     ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
     if (ret != sizeof(*hdr)) {
@@ -656,7 +693,8 @@ static coroutine_fn void do_co_req(void *opaque)
 out:
     /* there is at most one request for this sockfd, so it is safe to
      * set each handler to NULL. */
-    aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
+    aio_set_fd_handler(srco->aio_context, sockfd, false,
+                       NULL, NULL, NULL);
 
     srco->ret = ret;
     srco->finished = true;
@@ -703,44 +741,13 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
 static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
 static void co_write_request(void *opaque);
 
-static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid)
-{
-    AIOReq *aio_req;
-
-    QLIST_FOREACH(aio_req, &s->pending_aio_head, aio_siblings) {
-        if (aio_req->oid == oid) {
-            return aio_req;
-        }
-    }
-
-    return NULL;
-}
-
-/*
- * This function searchs pending requests to the object `oid', and
- * sends them.
- */
-static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
-{
-    AIOReq *aio_req;
-    SheepdogAIOCB *acb;
-
-    while ((aio_req = find_pending_req(s, oid)) != NULL) {
-        acb = aio_req->aiocb;
-        /* move aio_req from pending list to inflight one */
-        QLIST_REMOVE(aio_req, aio_siblings);
-        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
-                        acb->aiocb_type);
-    }
-}
-
 static coroutine_fn void reconnect_to_sdog(void *opaque)
 {
     BDRVSheepdogState *s = opaque;
     AIOReq *aio_req, *next;
 
-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
     close(s->fd);
     s->fd = -1;
 
@@ -837,15 +844,9 @@ static void coroutine_fn aio_read_response(void *opaque)
              */
             if (rsp.result == SD_RES_SUCCESS) {
                 s->inode.data_vdi_id[idx] = s->inode.vdi_id;
-                s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
-                s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
+                acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
+                acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
             }
-            /*
-             * Some requests may be blocked because simultaneous
-             * create requests are not allowed, so we search the
-             * pending requests here.
-             */
-            send_pending_req(s, aio_req->oid);
         }
         break;
     case AIOCB_READ_UDATA:
@@ -871,10 +872,6 @@ static void coroutine_fn aio_read_response(void *opaque)
             rsp.result = SD_RES_SUCCESS;
             s->discard_supported = false;
             break;
-        case SD_RES_SUCCESS:
-            idx = data_oid_to_idx(aio_req->oid);
-            s->inode.data_vdi_id[idx] = 0;
-            break;
         default:
             break;
         }
@@ -953,7 +950,8 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
         return fd;
     }
 
-    aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, fd, false,
+                       co_read_response, NULL, s);
     return fd;
 }
 
@@ -1189,7 +1187,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
         hdr.flags = SD_FLAG_CMD_WRITE | flags;
         break;
     case AIOCB_DISCARD_OBJ:
-        hdr.opcode = SD_OP_DISCARD_OBJ;
+        hdr.opcode = SD_OP_WRITE_OBJ;
+        hdr.flags = SD_FLAG_CMD_WRITE | flags;
+        s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0;
+        offset = offsetof(SheepdogInode,
+                          data_vdi_id[data_oid_to_idx(oid)]);
+        oid = vid_to_vdi_oid(s->inode.vdi_id);
+        wlen = datalen = sizeof(uint32_t);
         break;
     }
 
@@ -1208,7 +1212,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
 
     qemu_co_mutex_lock(&s->lock);
     s->co_send = qemu_coroutine_self();
-    aio_set_fd_handler(s->aio_context, s->fd,
+    aio_set_fd_handler(s->aio_context, s->fd, false,
                        co_read_response, co_write_request, s);
     socket_set_cork(s->fd, 1);
 
@@ -1227,7 +1231,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     }
 out:
     socket_set_cork(s->fd, 0);
-    aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(s->aio_context, s->fd, false,
+                       co_read_response, NULL, s);
     s->co_send = NULL;
     qemu_co_mutex_unlock(&s->lock);
 }
@@ -1341,30 +1346,6 @@ out:
     return ret;
 }
 
-/* Return true if the specified request is linked to the pending list. */
-static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
-{
-    AIOReq *areq;
-    QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) {
-        if (areq != aio_req && areq->oid == aio_req->oid) {
-            /*
-             * Sheepdog cannot handle simultaneous create requests to the same
-             * object, so we cannot send the request until the previous request
-             * finishes.
-             */
-            DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid);
-            aio_req->flags = 0;
-            aio_req->base_oid = 0;
-            aio_req->create = false;
-            QLIST_REMOVE(aio_req, aio_siblings);
-            QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
-            return true;
-        }
-    }
-
-    return false;
-}
-
 static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
 {
     SheepdogAIOCB *acb = aio_req->aiocb;
@@ -1379,10 +1360,6 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
             goto out;
         }
 
-        if (check_simultaneous_create(s, aio_req)) {
-            return;
-        }
-
         if (s->inode.data_vdi_id[idx]) {
             aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
             aio_req->flags |= SD_FLAG_CMD_COW;
@@ -1405,7 +1382,8 @@ static void sd_detach_aio_context(BlockDriverState *bs)
 {
     BDRVSheepdogState *s = bs->opaque;
 
-    aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+                       NULL, NULL);
 }
 
 static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1414,7 +1392,8 @@ static void sd_attach_aio_context(BlockDriverState *bs,
     BDRVSheepdogState *s = bs->opaque;
 
     s->aio_context = new_context;
-    aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+    aio_set_fd_handler(new_context, s->fd, false,
+                       co_read_response, NULL, s);
 }
 
 /* TODO Convert to fine grained options */
@@ -1458,8 +1437,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     filename = qemu_opt_get(opts, "filename");
 
     QLIST_INIT(&s->inflight_aio_head);
-    QLIST_INIT(&s->pending_aio_head);
     QLIST_INIT(&s->failed_aio_head);
+    QLIST_INIT(&s->inflight_aiocb_head);
     s->fd = -1;
 
     memset(vdi, 0, sizeof(vdi));
@@ -1518,17 +1497,17 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     memcpy(&s->inode, buf, sizeof(s->inode));
-    s->min_dirty_data_idx = UINT32_MAX;
-    s->max_dirty_data_idx = 0;
 
     bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
     pstrcpy(s->name, sizeof(s->name), vdi);
     qemu_co_mutex_init(&s->lock);
+    qemu_co_queue_init(&s->overlapping_queue);
     qemu_opts_del(opts);
     g_free(buf);
     return 0;
 out:
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
     if (s->fd >= 0) {
         closesocket(s->fd);
     }
@@ -1537,6 +1516,70 @@ out:
     return ret;
 }
 
+static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue,
+                             Error **errp)
+{
+    BDRVSheepdogState *s = state->bs->opaque;
+    BDRVSheepdogReopenState *re_s;
+    int ret = 0;
+
+    re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1);
+
+    re_s->cache_flags = SD_FLAG_CMD_CACHE;
+    if (state->flags & BDRV_O_NOCACHE) {
+        re_s->cache_flags = SD_FLAG_CMD_DIRECT;
+    }
+
+    re_s->fd = get_sheep_fd(s, errp);
+    if (re_s->fd < 0) {
+        ret = re_s->fd;
+        return ret;
+    }
+
+    return ret;
+}
+
+static void sd_reopen_commit(BDRVReopenState *state)
+{
+    BDRVSheepdogReopenState *re_s = state->opaque;
+    BDRVSheepdogState *s = state->bs->opaque;
+
+    if (s->fd) {
+        aio_set_fd_handler(s->aio_context, s->fd, false,
+                           NULL, NULL, NULL);
+        closesocket(s->fd);
+    }
+
+    s->fd = re_s->fd;
+    s->cache_flags = re_s->cache_flags;
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+
+    return;
+}
+
+static void sd_reopen_abort(BDRVReopenState *state)
+{
+    BDRVSheepdogReopenState *re_s = state->opaque;
+    BDRVSheepdogState *s = state->bs->opaque;
+
+    if (re_s == NULL) {
+        return;
+    }
+
+    if (re_s->fd) {
+        aio_set_fd_handler(s->aio_context, re_s->fd, false,
+                           NULL, NULL, NULL);
+        closesocket(re_s->fd);
+    }
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+
+    return;
+}
+
 static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
                         Error **errp)
 {
@@ -1595,7 +1638,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
 
 static int sd_prealloc(const char *filename, Error **errp)
 {
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk = NULL;
     BDRVSheepdogState *base = NULL;
     unsigned long buf_size;
     uint32_t idx, max_idx;
@@ -1604,19 +1647,23 @@ static int sd_prealloc(const char *filename, Error **errp)
     void *buf = NULL;
     int ret;
 
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    NULL, errp);
-    if (ret < 0) {
+    blk = blk_new_open(filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                       errp);
+    if (blk == NULL) {
+        ret = -EIO;
         goto out_with_err_set;
     }
 
-    vdi_size = bdrv_getlength(bs);
+    blk_set_allow_write_beyond_eof(blk, true);
+
+    vdi_size = blk_getlength(blk);
     if (vdi_size < 0) {
         ret = vdi_size;
         goto out;
     }
 
-    base = bs->opaque;
+    base = blk_bs(blk)->opaque;
     object_size = (UINT32_C(1) << base->inode.block_size_shift);
     buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
     buf = g_malloc0(buf_size);
@@ -1628,23 +1675,24 @@ static int sd_prealloc(const char *filename, Error **errp)
          * The created image can be a cloned image, so we need to read
          * a data from the source image.
          */
-        ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
+        ret = blk_pread(blk, idx * buf_size, buf, buf_size);
         if (ret < 0) {
             goto out;
         }
-        ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
+        ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
         if (ret < 0) {
             goto out;
         }
     }
 
+    ret = 0;
 out:
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Can't pre-allocate");
     }
 out_with_err_set:
-    if (bs) {
-        bdrv_unref(bs);
+    if (blk) {
+        blk_unref(blk);
     }
     g_free(buf);
 
@@ -1716,7 +1764,7 @@ static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
         if ((object_size - 1) & object_size) {    /* not a power of 2? */
             return -EINVAL;
         }
-        obj_order = ffs(object_size) - 1;
+        obj_order = ctz32(object_size);
         if (obj_order < 20 || obj_order > 31) {
             return -EINVAL;
         }
@@ -1784,7 +1832,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
     }
 
     if (backing_file) {
-        BlockDriverState *bs;
+        BlockBackend *blk;
         BDRVSheepdogState *base;
         BlockDriver *drv;
 
@@ -1796,23 +1844,23 @@ static int sd_create(const char *filename, QemuOpts *opts,
             goto out;
         }
 
-        bs = NULL;
-        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, NULL,
-                        errp);
-        if (ret < 0) {
+        blk = blk_new_open(backing_file, NULL, NULL,
+                           BDRV_O_PROTOCOL | BDRV_O_CACHE_WB, errp);
+        if (blk == NULL) {
+            ret = -EIO;
             goto out;
         }
 
-        base = bs->opaque;
+        base = blk_bs(blk)->opaque;
 
         if (!is_snapshot(&base->inode)) {
             error_setg(errp, "cannot clone from a non snapshot vdi");
-            bdrv_unref(bs);
+            blk_unref(blk);
             ret = -EINVAL;
             goto out;
         }
         s->inode.vdi_id = base->inode.vdi_id;
-        bdrv_unref(bs);
+        blk_unref(blk);
     }
 
     s->aio_context = qemu_get_aio_context();
@@ -1827,8 +1875,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
 
         fd = connect_to_sdog(s, &local_err);
         if (fd < 0) {
-            error_report("%s", error_get_pretty(local_err));
-            error_free(local_err);
+            error_report_err(local_err);
             ret = -EIO;
             goto out;
         }
@@ -1912,7 +1959,8 @@ static void sd_close(BlockDriverState *bs)
         error_report("%s, %s", sd_strerror(rsp->result), s->name);
     }
 
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+                       false, NULL, NULL, NULL);
     closesocket(s->fd);
     g_free(s->host_spec);
 }
@@ -1974,16 +2022,16 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
     AIOReq *aio_req;
     uint32_t offset, data_len, mn, mx;
 
-    mn = s->min_dirty_data_idx;
-    mx = s->max_dirty_data_idx;
+    mn = acb->min_dirty_data_idx;
+    mx = acb->max_dirty_data_idx;
     if (mn <= mx) {
         /* we need to update the vdi object. */
         offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
             mn * sizeof(s->inode.data_vdi_id[0]);
         data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
 
-        s->min_dirty_data_idx = UINT32_MAX;
-        s->max_dirty_data_idx = 0;
+        acb->min_dirty_data_idx = UINT32_MAX;
+        acb->max_dirty_data_idx = 0;
 
         iov.iov_base = &s->inode;
         iov.iov_len = sizeof(s->inode);
@@ -2192,15 +2240,11 @@ static int coroutine_fn sd_co_rw_vector(void *p)
         }
 
         aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
-                                old_oid, done);
+                                old_oid,
+                                acb->aiocb_type == AIOCB_DISCARD_OBJ ?
+                                0 : done);
         QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
 
-        if (create) {
-            if (check_simultaneous_create(s, aio_req)) {
-                goto done;
-            }
-        }
-
         add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
                         acb->aiocb_type);
     done:
@@ -2215,6 +2259,20 @@ out:
     return 1;
 }
 
+static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
+{
+    SheepdogAIOCB *cb;
+
+    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
+        if (AIOCBOverlapping(aiocb, cb)) {
+            return true;
+        }
+    }
+
+    QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings);
+    return false;
+}
+
 static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors, QEMUIOVector *qiov)
 {
@@ -2234,14 +2292,25 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
     acb->aio_done_func = sd_write_done;
     acb->aiocb_type = AIOCB_WRITE_UDATA;
 
+retry:
+    if (check_overlapping_aiocb(s, acb)) {
+        qemu_co_queue_wait(&s->overlapping_queue);
+        goto retry;
+    }
+
     ret = sd_co_rw_vector(acb);
     if (ret <= 0) {
+        QLIST_REMOVE(acb, aiocb_siblings);
+        qemu_co_queue_restart_all(&s->overlapping_queue);
         qemu_aio_unref(acb);
         return ret;
     }
 
     qemu_coroutine_yield();
 
+    QLIST_REMOVE(acb, aiocb_siblings);
+    qemu_co_queue_restart_all(&s->overlapping_queue);
+
     return acb->ret;
 }
 
@@ -2250,19 +2319,30 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
 {
     SheepdogAIOCB *acb;
     int ret;
+    BDRVSheepdogState *s = bs->opaque;
 
     acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
     acb->aiocb_type = AIOCB_READ_UDATA;
     acb->aio_done_func = sd_finish_aiocb;
 
+retry:
+    if (check_overlapping_aiocb(s, acb)) {
+        qemu_co_queue_wait(&s->overlapping_queue);
+        goto retry;
+    }
+
     ret = sd_co_rw_vector(acb);
     if (ret <= 0) {
+        QLIST_REMOVE(acb, aiocb_siblings);
+        qemu_co_queue_restart_all(&s->overlapping_queue);
         qemu_aio_unref(acb);
         return ret;
     }
 
     qemu_coroutine_yield();
 
+    QLIST_REMOVE(acb, aiocb_siblings);
+    qemu_co_queue_restart_all(&s->overlapping_queue);
     return acb->ret;
 }
 
@@ -2339,8 +2419,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 
     ret = do_sd_create(s, &new_vid, 1, &local_err);
     if (ret < 0) {
-        error_report("failed to create inode for snapshot: %s",
-                     error_get_pretty(local_err));
+        error_reportf_err(local_err,
+                          "failed to create inode for snapshot: ");
         goto cleanup;
     }
 
@@ -2411,13 +2491,131 @@ out:
     return ret;
 }
 
+#define NR_BATCHED_DISCARD 128
+
+static bool remove_objects(BDRVSheepdogState *s)
+{
+    int fd, i = 0, nr_objs = 0;
+    Error *local_err = NULL;
+    int ret = 0;
+    bool result = true;
+    SheepdogInode *inode = &s->inode;
+
+    fd = connect_to_sdog(s, &local_err);
+    if (fd < 0) {
+        error_report_err(local_err);
+        return false;
+    }
+
+    nr_objs = count_data_objs(inode);
+    while (i < nr_objs) {
+        int start_idx, nr_filled_idx;
+
+        while (i < nr_objs && !inode->data_vdi_id[i]) {
+            i++;
+        }
+        start_idx = i;
+
+        nr_filled_idx = 0;
+        while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) {
+            if (inode->data_vdi_id[i]) {
+                inode->data_vdi_id[i] = 0;
+                nr_filled_idx++;
+            }
+
+            i++;
+        }
+
+        ret = write_object(fd, s->aio_context,
+                           (char *)&inode->data_vdi_id[start_idx],
+                           vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
+                           (i - start_idx) * sizeof(uint32_t),
+                           offsetof(struct SheepdogInode,
+                                    data_vdi_id[start_idx]),
+                           false, s->cache_flags);
+        if (ret < 0) {
+            error_report("failed to discard snapshot inode.");
+            result = false;
+            goto out;
+        }
+    }
+
+out:
+    closesocket(fd);
+    return result;
+}
+
 static int sd_snapshot_delete(BlockDriverState *bs,
                               const char *snapshot_id,
                               const char *name,
                               Error **errp)
 {
-    /* FIXME: Delete specified snapshot id.  */
-    return 0;
+    unsigned long snap_id = 0;
+    char snap_tag[SD_MAX_VDI_TAG_LEN];
+    Error *local_err = NULL;
+    int fd, ret;
+    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
+    BDRVSheepdogState *s = bs->opaque;
+    unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0;
+    uint32_t vid;
+    SheepdogVdiReq hdr = {
+        .opcode = SD_OP_DEL_VDI,
+        .data_length = wlen,
+        .flags = SD_FLAG_CMD_WRITE,
+    };
+    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
+
+    if (!remove_objects(s)) {
+        return -1;
+    }
+
+    memset(buf, 0, sizeof(buf));
+    memset(snap_tag, 0, sizeof(snap_tag));
+    pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
+    ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
+    if (ret || snap_id > UINT32_MAX) {
+        error_setg(errp, "Invalid snapshot ID: %s",
+                         snapshot_id ? snapshot_id : "<null>");
+        return -EINVAL;
+    }
+
+    if (snap_id) {
+        hdr.snapid = (uint32_t) snap_id;
+    } else {
+        pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
+        pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
+    }
+
+    ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true,
+                        &local_err);
+    if (ret) {
+        return ret;
+    }
+
+    fd = connect_to_sdog(s, &local_err);
+    if (fd < 0) {
+        error_report_err(local_err);
+        return -1;
+    }
+
+    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+                 buf, &wlen, &rlen);
+    closesocket(fd);
+    if (ret) {
+        return ret;
+    }
+
+    switch (rsp->result) {
+    case SD_RES_NO_VDI:
+        error_report("%s was already deleted", s->name);
+    case SD_RES_SUCCESS:
+        break;
+    default:
+        error_report("%s, %s", sd_strerror(rsp->result), s->name);
+        return -1;
+    }
+
+    return ret;
 }
 
 static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
@@ -2597,32 +2795,51 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
                                       int nb_sectors)
 {
     SheepdogAIOCB *acb;
-    QEMUIOVector dummy;
     BDRVSheepdogState *s = bs->opaque;
     int ret;
+    QEMUIOVector discard_iov;
+    struct iovec iov;
+    uint32_t zero = 0;
 
     if (!s->discard_supported) {
             return 0;
     }
 
-    acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+    memset(&discard_iov, 0, sizeof(discard_iov));
+    memset(&iov, 0, sizeof(iov));
+    iov.iov_base = &zero;
+    iov.iov_len = sizeof(zero);
+    discard_iov.iov = &iov;
+    discard_iov.niov = 1;
+    acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
     acb->aiocb_type = AIOCB_DISCARD_OBJ;
     acb->aio_done_func = sd_finish_aiocb;
 
+retry:
+    if (check_overlapping_aiocb(s, acb)) {
+        qemu_co_queue_wait(&s->overlapping_queue);
+        goto retry;
+    }
+
     ret = sd_co_rw_vector(acb);
     if (ret <= 0) {
+        QLIST_REMOVE(acb, aiocb_siblings);
+        qemu_co_queue_restart_all(&s->overlapping_queue);
         qemu_aio_unref(acb);
         return ret;
     }
 
     qemu_coroutine_yield();
 
+    QLIST_REMOVE(acb, aiocb_siblings);
+    qemu_co_queue_restart_all(&s->overlapping_queue);
+
     return acb->ret;
 }
 
 static coroutine_fn int64_t
 sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                       int *pnum)
+                       int *pnum, BlockDriverState **file)
 {
     BDRVSheepdogState *s = bs->opaque;
     SheepdogInode *inode = &s->inode;
@@ -2653,6 +2870,9 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
     if (*pnum > nb_sectors) {
         *pnum = nb_sectors;
     }
+    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
+        *file = bs;
+    }
     return ret;
 }
 
@@ -2712,6 +2932,9 @@ static BlockDriver bdrv_sheepdog = {
     .instance_size  = sizeof(BDRVSheepdogState),
     .bdrv_needs_filename = true,
     .bdrv_file_open = sd_open,
+    .bdrv_reopen_prepare    = sd_reopen_prepare,
+    .bdrv_reopen_commit     = sd_reopen_commit,
+    .bdrv_reopen_abort      = sd_reopen_abort,
     .bdrv_close     = sd_close,
     .bdrv_create    = sd_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2745,6 +2968,9 @@ static BlockDriver bdrv_sheepdog_tcp = {
     .instance_size  = sizeof(BDRVSheepdogState),
     .bdrv_needs_filename = true,
     .bdrv_file_open = sd_open,
+    .bdrv_reopen_prepare    = sd_reopen_prepare,
+    .bdrv_reopen_commit     = sd_reopen_commit,
+    .bdrv_reopen_abort      = sd_reopen_abort,
     .bdrv_close     = sd_close,
     .bdrv_create    = sd_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2778,6 +3004,9 @@ static BlockDriver bdrv_sheepdog_unix = {
     .instance_size  = sizeof(BDRVSheepdogState),
     .bdrv_needs_filename = true,
     .bdrv_file_open = sd_open,
+    .bdrv_reopen_prepare    = sd_reopen_prepare,
+    .bdrv_reopen_commit     = sd_reopen_commit,
+    .bdrv_reopen_abort      = sd_reopen_abort,
     .bdrv_close     = sd_close,
     .bdrv_create    = sd_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
This page took 0.050278 seconds and 4 git commands to generate.