X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/e376a788ae130454ad5e797f60cb70d0308babb6..016c7182315b1f842ac351fae86041d2c8fe4596:/block/raw-posix.c diff --git a/block/raw-posix.c b/block/raw-posix.c index abfedbea73..657af95637 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -22,14 +22,13 @@ * THE SOFTWARE. */ #include "qemu-common.h" -#include "qemu-timer.h" -#include "qemu-char.h" -#include "qemu-log.h" -#include "block_int.h" -#include "module.h" +#include "qemu/timer.h" +#include "qemu/log.h" +#include "block/block_int.h" +#include "qemu/module.h" #include "trace.h" -#include "thread-pool.h" -#include "iov.h" +#include "block/thread-pool.h" +#include "qemu/iov.h" #include "raw-aio.h" #if defined(__APPLE__) && (__MACH__) @@ -60,6 +59,9 @@ #ifdef CONFIG_FIEMAP #include #endif +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE +#include +#endif #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) #include #include @@ -139,6 +141,7 @@ typedef struct BDRVRawState { #ifdef CONFIG_XFS bool is_xfs : 1; #endif + bool has_discard : 1; } BDRVRawState; typedef struct BDRVRawReopenState { @@ -160,7 +163,7 @@ typedef struct RawPosixAIOData { void *aio_ioctl_buf; }; int aio_niov; - size_t aio_nbytes; + uint64_t aio_nbytes; #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ off_t aio_offset; int aio_type; @@ -290,6 +293,7 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, } #endif + s->has_discard = 1; #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { s->is_xfs = 1; @@ -431,22 +435,6 @@ static void raw_reopen_abort(BDRVReopenState *state) #endif */ -/* - * Check if all memory in this vector is sector aligned. - */ -static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) -{ - int i; - - for (i = 0; i < qiov->niov; i++) { - if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) { - return 0; - } - } - - return 1; -} - static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) { int ret; @@ -456,15 +444,7 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) return -errno; } - /* - * This looks weird, but the aio code only considers a request - * successful if it has written the full number of bytes. - * - * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command, - * so in fact we return the ioctl command here to make posix_aio_read() - * happy.. - */ - return aiocb->aio_nbytes; + return 0; } static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb) @@ -643,6 +623,72 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) return nbytes; } +#ifdef CONFIG_XFS +static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes) +{ + struct xfs_flock64 fl; + + memset(&fl, 0, sizeof(fl)); + fl.l_whence = SEEK_SET; + fl.l_start = offset; + fl.l_len = bytes; + + if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { + DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno)); + return -errno; + } + + return 0; +} +#endif + +static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) +{ + int ret = -EOPNOTSUPP; + BDRVRawState *s = aiocb->bs->opaque; + + if (s->has_discard == 0) { + return 0; + } + + if (aiocb->aio_type & QEMU_AIO_BLKDEV) { +#ifdef BLKDISCARD + do { + uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; + if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) { + return 0; + } + } while (errno == EINTR); + + ret = -errno; +#endif + } else { +#ifdef CONFIG_XFS + if (s->is_xfs) { + return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); + } +#endif + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + do { + if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes) == 0) { + return 0; + } + } while (errno == EINTR); + + ret = -errno; +#endif + } + + if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP || + ret == -ENOTTY) { + s->has_discard = 0; + ret = 0; + } + return ret; +} + static int aio_worker(void *arg) { RawPosixAIOData *aiocb = arg; @@ -677,6 +723,9 @@ static int aio_worker(void *arg) case QEMU_AIO_IOCTL: ret = handle_aiocb_ioctl(aiocb); break; + case QEMU_AIO_DISCARD: + ret = handle_aiocb_discard(aiocb); + break; default: fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); ret = -EINVAL; @@ -723,7 +772,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, * driver that it needs to copy the buffer. */ if ((bs->open_flags & BDRV_O_NOCACHE)) { - if (!qiov_is_aligned(bs, qiov)) { + if (!bdrv_qiov_is_aligned(bs, qiov)) { type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO } else if (s->use_aio) { @@ -1077,37 +1126,14 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, } } -#ifdef CONFIG_XFS -static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors) +static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) { - struct xfs_flock64 fl; - - memset(&fl, 0, sizeof(fl)); - fl.l_whence = SEEK_SET; - fl.l_start = sector_num << 9; - fl.l_len = (int64_t)nb_sectors << 9; - - if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { - DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno)); - return -errno; - } - - return 0; -} -#endif - -static coroutine_fn int raw_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) -{ -#ifdef CONFIG_XFS BDRVRawState *s = bs->opaque; - if (s->is_xfs) { - return xfs_discard(s, sector_num, nb_sectors); - } -#endif - - return 0; + return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors, + cb, opaque, QEMU_AIO_DISCARD); } static QEMUOptionParameter raw_create_options[] = { @@ -1130,12 +1156,12 @@ static BlockDriver bdrv_file = { .bdrv_reopen_abort = raw_reopen_abort, .bdrv_close = raw_close, .bdrv_create = raw_create, - .bdrv_co_discard = raw_co_discard, .bdrv_co_is_allocated = raw_co_is_allocated, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_aio_discard = raw_aio_discard, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1364,6 +1390,19 @@ static int fd_open(BlockDriverState *bs) #endif /* !linux && !FreeBSD */ +static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVRawState *s = bs->opaque; + + if (fd_open(bs) < 0) { + return NULL; + } + return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors, + cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); +} + static int hdev_create(const char *filename, QEMUOptionParameter *options) { int fd; @@ -1416,6 +1455,7 @@ static BlockDriver bdrv_host_device = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_aio_discard = hdev_aio_discard, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1777,6 +1817,40 @@ static BlockDriver bdrv_host_cdrom = { }; #endif /* __FreeBSD__ */ +#ifdef CONFIG_LINUX_AIO +/** + * Return the file descriptor for Linux AIO + * + * This function is a layering violation and should be removed when it becomes + * possible to call the block layer outside the global mutex. It allows the + * caller to hijack the file descriptor so I/O can be performed outside the + * block layer. + */ +int raw_get_aio_fd(BlockDriverState *bs) +{ + BDRVRawState *s; + + if (!bs->drv) { + return -ENOMEDIUM; + } + + if (bs->drv == bdrv_find_format("raw")) { + bs = bs->file; + } + + /* raw-posix has several protocols so just check for raw_aio_readv */ + if (bs->drv->bdrv_aio_readv != raw_aio_readv) { + return -ENOTSUP; + } + + s = bs->opaque; + if (!s->use_aio) { + return -ENOTSUP; + } + return s->fd; +} +#endif /* CONFIG_LINUX_AIO */ + static void bdrv_file_init(void) { /*