*/
#include "qemu/osdep.h"
+#include "qemu-common.h"
#include "qapi/error.h"
#include "qemu/cutils.h"
#include "qemu/error-report.h"
#include <xfs/xfs.h>
#endif
-//#define DEBUG_BLOCK
-
-#ifdef DEBUG_BLOCK
-# define DEBUG_BLOCK_PRINT 1
-#else
-# define DEBUG_BLOCK_PRINT 0
-#endif
-#define DPRINTF(fmt, ...) \
-do { \
- if (DEBUG_BLOCK_PRINT) { \
- printf(fmt, ## __VA_ARGS__); \
- } \
-} while (0)
+#include "trace.h"
/* OS X does not have O_DSYNC */
#ifndef O_DSYNC
uint64_t locked_perm;
uint64_t locked_shared_perm;
+ int perm_change_fd;
+ int perm_change_flags;
+ BDRVReopenState *reopen_state;
+
#ifdef CONFIG_XFS
bool is_xfs:1;
#endif
bool page_cache_inconsistent:1;
bool has_fallocate;
bool needs_alignment;
+ bool drop_cache;
bool check_cache_dropped;
PRManager *pr_mgr;
typedef struct BDRVRawReopenState {
int fd;
int open_flags;
+ bool drop_cache;
bool check_cache_dropped;
} BDRVRawReopenState;
}
}
-static void raw_parse_flags(int bdrv_flags, int *open_flags)
+static void raw_parse_flags(int bdrv_flags, int *open_flags, bool has_writers)
{
+ bool read_write = false;
assert(open_flags != NULL);
*open_flags |= O_BINARY;
*open_flags &= ~O_ACCMODE;
- if (bdrv_flags & BDRV_O_RDWR) {
+
+ if (bdrv_flags & BDRV_O_AUTO_RDONLY) {
+ read_write = has_writers;
+ } else if (bdrv_flags & BDRV_O_RDWR) {
+ read_write = true;
+ }
+
+ if (read_write) {
*open_flags |= O_RDWR;
} else {
*open_flags |= O_RDONLY;
.type = QEMU_OPT_STRING,
.help = "id of persistent reservation manager object (default: none)",
},
+#if defined(__linux__)
+ {
+ .name = "drop-cache",
+ .type = QEMU_OPT_BOOL,
+ .help = "invalidate page cache during live migration (default: on)",
+ },
+#endif
{
.name = "x-check-cache-dropped",
.type = QEMU_OPT_BOOL,
},
};
+static const char *const mutable_opts[] = { "x-check-cache-dropped", NULL };
+
static int raw_open_common(BlockDriverState *bs, QDict *options,
int bdrv_flags, int open_flags,
bool device, Error **errp)
}
}
+ s->drop_cache = qemu_opt_get_bool(opts, "drop-cache", true);
s->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped",
false);
s->open_flags = open_flags;
- raw_parse_flags(bdrv_flags, &s->open_flags);
+ raw_parse_flags(bdrv_flags, &s->open_flags, false);
s->fd = -1;
fd = qemu_open(filename, s->open_flags, 0644);
ret = fd < 0 ? -errno : 0;
- if (ret == -EACCES || ret == -EROFS) {
- /* Try to degrade to read-only, but if it doesn't work, still use the
- * normal error message. */
- if (bdrv_apply_auto_read_only(bs, NULL, NULL) == 0) {
- bdrv_flags &= ~BDRV_O_RDWR;
- raw_parse_flags(bdrv_flags, &s->open_flags);
- assert(!(s->open_flags & O_CREAT));
- fd = qemu_open(filename, s->open_flags);
- ret = fd < 0 ? -errno : 0;
- }
- }
-
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not open '%s'", filename);
if (ret == -EROFS) {
}
#endif
- bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
ret = 0;
fail:
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
switch (op) {
case RAW_PL_PREPARE:
+ if ((s->perm | new_perm) == s->perm &&
+ (s->shared_perm & new_shared) == s->shared_perm)
+ {
+ /*
+ * We are going to unlock bytes, it should not fail. If it fail due
+ * to some fs-dependent permission-unrelated reasons (which occurs
+ * sometimes on NFS and leads to abort in bdrv_replace_child) we
+ * can't prevent such errors by any check here. And we ignore them
+ * anyway in ABORT and COMMIT.
+ */
+ return 0;
+ }
ret = raw_apply_lock_bytes(s, s->fd, s->perm | new_perm,
~s->shared_perm | ~new_shared,
false, errp);
return ret;
}
+static int raw_reconfigure_getfd(BlockDriverState *bs, int flags,
+ int *open_flags, uint64_t perm, bool force_dup,
+ Error **errp)
+{
+ BDRVRawState *s = bs->opaque;
+ int fd = -1;
+ int ret;
+ bool has_writers = perm &
+ (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_RESIZE);
+ int fcntl_flags = O_APPEND | O_NONBLOCK;
+#ifdef O_NOATIME
+ fcntl_flags |= O_NOATIME;
+#endif
+
+ *open_flags = 0;
+ if (s->type == FTYPE_CD) {
+ *open_flags |= O_NONBLOCK;
+ }
+
+ raw_parse_flags(flags, open_flags, has_writers);
+
+#ifdef O_ASYNC
+ /* Not all operating systems have O_ASYNC, and those that don't
+ * will not let us track the state into rs->open_flags (typically
+ * you achieve the same effect with an ioctl, for example I_SETSIG
+ * on Solaris). But we do not use O_ASYNC, so that's fine.
+ */
+ assert((s->open_flags & O_ASYNC) == 0);
+#endif
+
+ if (!force_dup && *open_flags == s->open_flags) {
+ /* We're lucky, the existing fd is fine */
+ return s->fd;
+ }
+
+ if ((*open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
+ /* dup the original fd */
+ fd = qemu_dup(s->fd);
+ if (fd >= 0) {
+ ret = fcntl_setfl(fd, *open_flags);
+ if (ret) {
+ qemu_close(fd);
+ fd = -1;
+ }
+ }
+ }
+
+ /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
+ if (fd == -1) {
+ const char *normalized_filename = bs->filename;
+ ret = raw_normalize_devicepath(&normalized_filename, errp);
+ if (ret >= 0) {
+ assert(!(*open_flags & O_CREAT));
+ fd = qemu_open(normalized_filename, *open_flags);
+ if (fd == -1) {
+ error_setg_errno(errp, errno, "Could not reopen file");
+ return -1;
+ }
+ }
+ }
+
+ return fd;
+}
+
static int raw_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
BDRVRawState *s;
BDRVRawReopenState *rs;
QemuOpts *opts;
- int ret = 0;
+ int ret;
Error *local_err = NULL;
assert(state != NULL);
state->opaque = g_new0(BDRVRawReopenState, 1);
rs = state->opaque;
- rs->fd = -1;
/* Handle options changes */
opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
goto out;
}
+ rs->drop_cache = qemu_opt_get_bool_del(opts, "drop-cache", true);
rs->check_cache_dropped =
qemu_opt_get_bool_del(opts, "x-check-cache-dropped", false);
* bdrv_reopen_prepare() will detect changes and complain. */
qemu_opts_to_qdict(opts, state->options);
- if (s->type == FTYPE_CD) {
- rs->open_flags |= O_NONBLOCK;
- }
-
- raw_parse_flags(state->flags, &rs->open_flags);
-
- int fcntl_flags = O_APPEND | O_NONBLOCK;
-#ifdef O_NOATIME
- fcntl_flags |= O_NOATIME;
-#endif
-
-#ifdef O_ASYNC
- /* Not all operating systems have O_ASYNC, and those that don't
- * will not let us track the state into rs->open_flags (typically
- * you achieve the same effect with an ioctl, for example I_SETSIG
- * on Solaris). But we do not use O_ASYNC, so that's fine.
- */
- assert((s->open_flags & O_ASYNC) == 0);
-#endif
-
- if ((rs->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
- /* dup the original fd */
- rs->fd = qemu_dup(s->fd);
- if (rs->fd >= 0) {
- ret = fcntl_setfl(rs->fd, rs->open_flags);
- if (ret) {
- qemu_close(rs->fd);
- rs->fd = -1;
- }
- }
- }
-
- /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
- if (rs->fd == -1) {
- const char *normalized_filename = state->bs->filename;
- ret = raw_normalize_devicepath(&normalized_filename, errp);
- if (ret >= 0) {
- assert(!(rs->open_flags & O_CREAT));
- rs->fd = qemu_open(normalized_filename, rs->open_flags);
- if (rs->fd == -1) {
- error_setg_errno(errp, errno, "Could not reopen file");
- ret = -1;
- }
- }
+ rs->fd = raw_reconfigure_getfd(state->bs, state->flags, &rs->open_flags,
+ state->perm, true, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -1;
+ goto out;
}
/* Fail already reopen_prepare() if we can't get a working O_DIRECT
if (rs->fd != -1) {
raw_probe_alignment(state->bs, rs->fd, &local_err);
if (local_err) {
- qemu_close(rs->fd);
- rs->fd = -1;
error_propagate(errp, local_err);
ret = -EINVAL;
+ goto out_fd;
}
}
+ s->reopen_state = state;
+ ret = 0;
+out_fd:
+ if (ret < 0) {
+ qemu_close(rs->fd);
+ rs->fd = -1;
+ }
out:
qemu_opts_del(opts);
return ret;
{
BDRVRawReopenState *rs = state->opaque;
BDRVRawState *s = state->bs->opaque;
- Error *local_err = NULL;
+ s->drop_cache = rs->drop_cache;
s->check_cache_dropped = rs->check_cache_dropped;
s->open_flags = rs->open_flags;
- /* Copy locks to the new fd before closing the old one. */
- raw_apply_lock_bytes(NULL, rs->fd, s->locked_perm,
- s->locked_shared_perm, false, &local_err);
- if (local_err) {
- /* shouldn't fail in a sane host, but report it just in case. */
- error_report_err(local_err);
- }
qemu_close(s->fd);
s->fd = rs->fd;
g_free(state->opaque);
state->opaque = NULL;
+
+ assert(s->reopen_state == state);
+ s->reopen_state = NULL;
}
static void raw_reopen_abort(BDRVReopenState *state)
{
BDRVRawReopenState *rs = state->opaque;
+ BDRVRawState *s = state->bs->opaque;
/* nothing to do if NULL, we didn't get far enough */
if (rs == NULL) {
}
g_free(state->opaque);
state->opaque = NULL;
+
+ assert(s->reopen_state == state);
+ s->reopen_state = NULL;
}
static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
}
#endif
-static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
+#if defined(__linux__)
+static int handle_aiocb_ioctl(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
int ret;
ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf);
return 0;
}
+#endif /* linux */
-static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
+static int handle_aiocb_flush(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
BDRVRawState *s = aiocb->bs->opaque;
int ret;
return offset;
}
-static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
+static int handle_aiocb_rw(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
ssize_t nbytes;
char *buf;
* we can just use plain pread/pwrite without any problems.
*/
if (aiocb->io.niov == 1) {
- return handle_aiocb_rw_linear(aiocb, aiocb->io.iov->iov_base);
+ nbytes = handle_aiocb_rw_linear(aiocb, aiocb->io.iov->iov_base);
+ goto out;
}
/*
* We have more than one iovec, and all are properly aligned.
nbytes = handle_aiocb_rw_vector(aiocb);
if (nbytes == aiocb->aio_nbytes ||
(nbytes < 0 && nbytes != -ENOSYS)) {
- return nbytes;
+ goto out;
}
preadv_present = false;
}
*/
buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes);
if (buf == NULL) {
- return -ENOMEM;
+ nbytes = -ENOMEM;
+ goto out;
}
if (aiocb->aio_type & QEMU_AIO_WRITE) {
}
qemu_vfree(buf);
- return nbytes;
+out:
+ if (nbytes == aiocb->aio_nbytes) {
+ return 0;
+ } else if (nbytes >= 0 && nbytes < aiocb->aio_nbytes) {
+ if (aiocb->aio_type & QEMU_AIO_WRITE) {
+ return -EINVAL;
+ } else {
+ iov_memset(aiocb->io.iov, aiocb->io.niov, nbytes,
+ 0, aiocb->aio_nbytes - nbytes);
+ return 0;
+ }
+ } else {
+ assert(nbytes < 0);
+ return nbytes;
+ }
}
#ifdef CONFIG_XFS
static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
{
+ int64_t len;
struct xfs_flock64 fl;
int err;
+ len = lseek(s->fd, 0, SEEK_END);
+ if (len < 0) {
+ return -errno;
+ }
+
+ if (offset + bytes > len) {
+ /* XFS_IOC_ZERO_RANGE does not increase the file length */
+ if (ftruncate(s->fd, offset + bytes) < 0) {
+ return -errno;
+ }
+ }
+
memset(&fl, 0, sizeof(fl));
fl.l_whence = SEEK_SET;
fl.l_start = offset;
if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
err = errno;
- DPRINTF("cannot write zero range (%s)\n", strerror(errno));
+ trace_file_xfs_write_zeroes(strerror(errno));
return -err;
}
if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
err = errno;
- DPRINTF("cannot punch hole (%s)\n", strerror(errno));
+ trace_file_xfs_discard(strerror(errno));
return -err;
}
}
#ifdef BLKZEROOUT
- do {
- uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
- if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
- return 0;
- }
- } while (errno == EINTR);
+ /* The BLKZEROOUT implementation in the kernel doesn't set
+ * BLKDEV_ZERO_NOFALLBACK, so we can't call this if we have to avoid slow
+ * fallbacks. */
+ if (!(aiocb->aio_type & QEMU_AIO_NO_FALLBACK)) {
+ do {
+ uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+ if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
+ return 0;
+ }
+ } while (errno == EINTR);
- ret = translate_err(-errno);
+ ret = translate_err(-errno);
+ }
#endif
if (ret == -ENOTSUP) {
return ret;
}
-static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
+static int handle_aiocb_write_zeroes(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS)
BDRVRawState *s = aiocb->bs->opaque;
#endif
return -ENOTSUP;
}
-static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb)
+static int handle_aiocb_write_zeroes_unmap(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque;
int ret;
}
#endif
-static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb)
+static int handle_aiocb_copy_range(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
uint64_t bytes = aiocb->aio_nbytes;
off_t in_off = aiocb->aio_offset;
off_t out_off = aiocb->copy_range.aio_offset2;
return 0;
}
-static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
+static int handle_aiocb_discard(void *opaque)
{
+ RawPosixAIOData *aiocb = opaque;
int ret = -EOPNOTSUPP;
BDRVRawState *s = aiocb->bs->opaque;
return result;
}
-static int aio_worker(void *arg)
-{
- RawPosixAIOData *aiocb = arg;
- ssize_t ret = 0;
-
- switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
- case QEMU_AIO_READ:
- ret = handle_aiocb_rw(aiocb);
- if (ret >= 0 && ret < aiocb->aio_nbytes) {
- iov_memset(aiocb->io.iov, aiocb->io.niov, ret,
- 0, aiocb->aio_nbytes - ret);
-
- ret = aiocb->aio_nbytes;
- }
- if (ret == aiocb->aio_nbytes) {
- ret = 0;
- } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
- ret = -EINVAL;
- }
- break;
- case QEMU_AIO_WRITE:
- ret = handle_aiocb_rw(aiocb);
- if (ret == aiocb->aio_nbytes) {
- ret = 0;
- } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
- ret = -EINVAL;
- }
- break;
- case QEMU_AIO_FLUSH:
- ret = handle_aiocb_flush(aiocb);
- break;
- case QEMU_AIO_IOCTL:
- ret = handle_aiocb_ioctl(aiocb);
- break;
- case QEMU_AIO_DISCARD:
- ret = handle_aiocb_discard(aiocb);
- break;
- case QEMU_AIO_WRITE_ZEROES:
- ret = handle_aiocb_write_zeroes(aiocb);
- break;
- case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD:
- ret = handle_aiocb_write_zeroes_unmap(aiocb);
- break;
- case QEMU_AIO_COPY_RANGE:
- ret = handle_aiocb_copy_range(aiocb);
- break;
- case QEMU_AIO_TRUNCATE:
- g_assert_not_reached();
- default:
- error_report("invalid aio request (0x%x)", aiocb->aio_type);
- ret = -EINVAL;
- break;
- }
-
- g_free(aiocb);
- return ret;
-}
-
static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs,
ThreadPoolFunc func, void *arg)
{
return thread_pool_submit_co(pool, func, arg);
}
-static int paio_submit_co_full(BlockDriverState *bs, int fd,
- int64_t offset, int fd2, int64_t offset2,
- QEMUIOVector *qiov,
- int bytes, int type)
-{
- RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
-
- acb->bs = bs;
- acb->aio_type = type;
- acb->aio_fildes = fd;
-
- acb->aio_nbytes = bytes;
- acb->aio_offset = offset;
-
- if (qiov) {
- acb->io.iov = qiov->iov;
- acb->io.niov = qiov->niov;
- assert(qiov->size == bytes);
- } else {
- acb->copy_range.aio_fd2 = fd2;
- acb->copy_range.aio_offset2 = offset2;
- }
-
- trace_file_paio_submit_co(offset, bytes, type);
- return raw_thread_pool_submit(bs, aio_worker, acb);
-}
-
-static inline int paio_submit_co(BlockDriverState *bs, int fd,
- int64_t offset, QEMUIOVector *qiov,
- int bytes, int type)
-{
- return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type);
-}
-
static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
+ RawPosixAIOData acb;
if (fd_open(bs) < 0)
return -EIO;
}
}
- return paio_submit_co(bs, s->fd, offset, qiov, bytes, type);
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_fildes = s->fd,
+ .aio_type = type,
+ .aio_offset = offset,
+ .aio_nbytes = bytes,
+ .io = {
+ .iov = qiov->iov,
+ .niov = qiov->niov,
+ },
+ };
+
+ assert(qiov->size == bytes);
+ return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb);
}
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
static int raw_co_flush_to_disk(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
+ RawPosixAIOData acb;
int ret;
ret = fd_open(bs);
return ret;
}
- return paio_submit_co(bs, s->fd, 0, NULL, 0, QEMU_AIO_FLUSH);
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_fildes = s->fd,
+ .aio_type = QEMU_AIO_FLUSH,
+ };
+
+ return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
}
static void raw_aio_attach_aio_context(BlockDriverState *bs,
#endif
if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
#ifdef DIOCGMEDIASIZE
- if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
+ if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
#elif defined(DIOCGPART)
{
struct partinfo pi;
off_t data = 0, hole = 0;
int ret;
+ assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
+
ret = fd_open(bs);
if (ret < 0) {
return ret;
/* On a data extent, compute bytes to the end of the extent,
* possibly including a partial sector at EOF. */
*pnum = MIN(bytes, hole - offset);
+
+ /*
+ * We are not allowed to return partial sectors, though, so
+ * round up if necessary.
+ */
+ if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) {
+ int64_t file_length = raw_getlength(bs);
+ if (file_length > 0) {
+ /* Ignore errors, this is just a safeguard */
+ assert(hole == file_length);
+ }
+ *pnum = ROUND_UP(*pnum, bs->bl.request_alignment);
+ }
+
ret = BDRV_BLOCK_DATA;
} else {
/* On a hole, compute bytes to the beginning of the next extent. */
return;
}
+ if (!s->drop_cache) {
+ return;
+ }
+
if (s->open_flags & O_DIRECT) {
return; /* No host kernel page cache */
}
}
static coroutine_fn int
-raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev)
{
BDRVRawState *s = bs->opaque;
+ RawPosixAIOData acb;
+
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_fildes = s->fd,
+ .aio_type = QEMU_AIO_DISCARD,
+ .aio_offset = offset,
+ .aio_nbytes = bytes,
+ };
+
+ if (blkdev) {
+ acb.aio_type |= QEMU_AIO_BLKDEV;
+ }
- return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_DISCARD);
+ return raw_thread_pool_submit(bs, handle_aiocb_discard, &acb);
}
-static int coroutine_fn raw_co_pwrite_zeroes(
- BlockDriverState *bs, int64_t offset,
- int bytes, BdrvRequestFlags flags)
+static coroutine_fn int
+raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+{
+ return raw_do_pdiscard(bs, offset, bytes, false);
+}
+
+static int coroutine_fn
+raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
+ BdrvRequestFlags flags, bool blkdev)
{
BDRVRawState *s = bs->opaque;
- int operation = QEMU_AIO_WRITE_ZEROES;
+ RawPosixAIOData acb;
+ ThreadPoolFunc *handler;
+
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_fildes = s->fd,
+ .aio_type = QEMU_AIO_WRITE_ZEROES,
+ .aio_offset = offset,
+ .aio_nbytes = bytes,
+ };
+
+ if (blkdev) {
+ acb.aio_type |= QEMU_AIO_BLKDEV;
+ }
+ if (flags & BDRV_REQ_NO_FALLBACK) {
+ acb.aio_type |= QEMU_AIO_NO_FALLBACK;
+ }
if (flags & BDRV_REQ_MAY_UNMAP) {
- operation |= QEMU_AIO_DISCARD;
+ acb.aio_type |= QEMU_AIO_DISCARD;
+ handler = handle_aiocb_write_zeroes_unmap;
+ } else {
+ handler = handle_aiocb_write_zeroes;
}
- return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
+ return raw_thread_pool_submit(bs, handler, &acb);
+}
+
+static int coroutine_fn raw_co_pwrite_zeroes(
+ BlockDriverState *bs, int64_t offset,
+ int bytes, BdrvRequestFlags flags)
+{
+ return raw_do_pwrite_zeroes(bs, offset, bytes, flags, false);
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
.name = BLOCK_OPT_PREALLOC,
.type = QEMU_OPT_STRING,
- .help = "Preallocation mode (allowed values: off, falloc, full)"
+ .help = "Preallocation mode (allowed values: off"
+#ifdef CONFIG_POSIX_FALLOCATE
+ ", falloc"
+#endif
+ ", full)"
},
{ /* end of list */ }
}
static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
Error **errp)
{
- return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
+ BDRVRawState *s = bs->opaque;
+ BDRVRawReopenState *rs = NULL;
+ int open_flags;
+ int ret;
+
+ if (s->perm_change_fd) {
+ /*
+ * In the context of reopen, this function may be called several times
+ * (directly and recursively while change permissions of the parent).
+ * This is even true for children that don't inherit from the original
+ * reopen node, so s->reopen_state is not set.
+ *
+ * Ignore all but the first call.
+ */
+ return 0;
+ }
+
+ if (s->reopen_state) {
+ /* We already have a new file descriptor to set permissions for */
+ assert(s->reopen_state->perm == perm);
+ assert(s->reopen_state->shared_perm == shared);
+ rs = s->reopen_state->opaque;
+ s->perm_change_fd = rs->fd;
+ s->perm_change_flags = rs->open_flags;
+ } else {
+ /* We may need a new fd if auto-read-only switches the mode */
+ ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags, perm,
+ false, errp);
+ if (ret < 0) {
+ return ret;
+ } else if (ret != s->fd) {
+ s->perm_change_fd = ret;
+ s->perm_change_flags = open_flags;
+ }
+ }
+
+ /* Prepare permissions on old fd to avoid conflicts between old and new,
+ * but keep everything locked that new will need. */
+ ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Copy locks to the new fd */
+ if (s->perm_change_fd) {
+ ret = raw_apply_lock_bytes(NULL, s->perm_change_fd, perm, ~shared,
+ false, errp);
+ if (ret < 0) {
+ raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
+ goto fail;
+ }
+ }
+ return 0;
+
+fail:
+ if (s->perm_change_fd && !s->reopen_state) {
+ qemu_close(s->perm_change_fd);
+ }
+ s->perm_change_fd = 0;
+ return ret;
}
static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
{
BDRVRawState *s = bs->opaque;
+
+ /* For reopen, we have already switched to the new fd (.bdrv_set_perm is
+ * called after .bdrv_reopen_commit) */
+ if (s->perm_change_fd && s->fd != s->perm_change_fd) {
+ qemu_close(s->fd);
+ s->fd = s->perm_change_fd;
+ s->open_flags = s->perm_change_flags;
+ }
+ s->perm_change_fd = 0;
+
raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
s->perm = perm;
s->shared_perm = shared;
static void raw_abort_perm_update(BlockDriverState *bs)
{
+ BDRVRawState *s = bs->opaque;
+
+ /* For reopen, .bdrv_reopen_abort is called afterwards and will close
+ * the file descriptor. */
+ if (s->perm_change_fd && !s->reopen_state) {
+ qemu_close(s->perm_change_fd);
+ }
+ s->perm_change_fd = 0;
+
raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
}
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
+ RawPosixAIOData acb;
BDRVRawState *s = bs->opaque;
BDRVRawState *src_s;
if (fd_open(src->bs) < 0 || fd_open(dst->bs) < 0) {
return -EIO;
}
- return paio_submit_co_full(bs, src_s->fd, src_offset, s->fd, dst_offset,
- NULL, bytes, QEMU_AIO_COPY_RANGE);
+
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_type = QEMU_AIO_COPY_RANGE,
+ .aio_fildes = src_s->fd,
+ .aio_offset = src_offset,
+ .aio_nbytes = bytes,
+ .copy_range = {
+ .aio_fd2 = s->fd,
+ .aio_offset2 = dst_offset,
+ },
+ };
+
+ return raw_thread_pool_submit(bs, handle_aiocb_copy_range, &acb);
}
BlockDriver bdrv_file = {
.bdrv_set_perm = raw_set_perm,
.bdrv_abort_perm_update = raw_abort_perm_update,
.create_opts = &raw_create_opts,
+ .mutable_opts = mutable_opts,
};
/***********************************************/
/* If a match was found, leave the loop */
if (*mediaIterator != 0) {
- DPRINTF("Matching using %s\n", matching_array[index]);
+ trace_file_FindEjectableOpticalMedia(matching_array[index]);
mediaType = g_strdup(matching_array[index]);
break;
}
if (partition_found == false) {
error_setg(errp, "Failed to find a working partition on disc");
} else {
- DPRINTF("Using %s as optical disc\n", test_partition);
+ trace_file_setup_cdrom(test_partition);
pstrcpy(bsd_path, MAXPATHLEN, test_partition);
}
return partition_found;
ret = ioctl(s->fd, SG_GET_SCSI_ID, &scsiid);
if (ret >= 0) {
- DPRINTF("SG device found: type=%d, version=%d\n",
- scsiid.scsi_type, sg_version);
+ trace_file_hdev_is_sg(scsiid.scsi_type, sg_version);
return true;
}
}
#if defined(__linux__)
-
-static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
{
BDRVRawState *s = bs->opaque;
- RawPosixAIOData *acb;
- ThreadPool *pool;
+ RawPosixAIOData acb;
+ int ret;
- if (fd_open(bs) < 0)
- return NULL;
+ ret = fd_open(bs);
+ if (ret < 0) {
+ return ret;
+ }
if (req == SG_IO && s->pr_mgr) {
struct sg_io_hdr *io_hdr = buf;
if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||
io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {
return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),
- s->fd, io_hdr, cb, opaque);
+ s->fd, io_hdr);
}
}
- acb = g_new(RawPosixAIOData, 1);
- acb->bs = bs;
- acb->aio_type = QEMU_AIO_IOCTL;
- acb->aio_fildes = s->fd;
- acb->aio_offset = 0;
- acb->ioctl.buf = buf;
- acb->ioctl.cmd = req;
- pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
- return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
+ acb = (RawPosixAIOData) {
+ .bs = bs,
+ .aio_type = QEMU_AIO_IOCTL,
+ .aio_fildes = s->fd,
+ .aio_offset = 0,
+ .ioctl = {
+ .buf = buf,
+ .cmd = req,
+ },
+ };
+
+ return raw_thread_pool_submit(bs, handle_aiocb_ioctl, &acb);
}
#endif /* linux */
static coroutine_fn int
hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
{
- BDRVRawState *s = bs->opaque;
int ret;
ret = fd_open(bs);
if (ret < 0) {
return ret;
}
- return paio_submit_co(bs, s->fd, offset, NULL, bytes,
- QEMU_AIO_DISCARD | QEMU_AIO_BLKDEV);
+ return raw_do_pdiscard(bs, offset, bytes, true);
}
static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags)
{
- BDRVRawState *s = bs->opaque;
- int operation = QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV;
int rc;
rc = fd_open(bs);
return rc;
}
- if (flags & BDRV_REQ_MAY_UNMAP) {
- operation |= QEMU_AIO_DISCARD;
- }
-
- return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
+ return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true);
}
static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts,
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
+ .mutable_opts = mutable_opts,
.bdrv_co_invalidate_cache = raw_co_invalidate_cache,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
/* generic scsi device */
#ifdef __linux__
- .bdrv_aio_ioctl = hdev_aio_ioctl,
+ .bdrv_co_ioctl = hdev_co_ioctl,
#endif
};
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
+ .mutable_opts = mutable_opts,
.bdrv_co_invalidate_cache = raw_co_invalidate_cache,
.bdrv_lock_medium = cdrom_lock_medium,
/* generic scsi device */
- .bdrv_aio_ioctl = hdev_aio_ioctl,
+ .bdrv_co_ioctl = hdev_co_ioctl,
};
#endif /* __linux__ */
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
+ .mutable_opts = mutable_opts,
.bdrv_co_preadv = raw_co_preadv,
.bdrv_co_pwritev = raw_co_pwritev,