* THE SOFTWARE.
*/
#include "qemu-common.h"
-#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
#include "qemu-timer.h"
-#include "exec-all.h"
#include "qemu-char.h"
-#endif
#include "block_int.h"
-#include "compatfd.h"
#include <assert.h>
#ifdef CONFIG_AIO
-#include <aio.h>
+#include "posix-aio-compat.h"
#endif
#ifdef CONFIG_COCOA
//#define DEBUG_FLOPPY
//#define DEBUG_BLOCK
-#if defined(DEBUG_BLOCK) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
-#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (loglevel != 0) \
- { fprintf(logfile, formatCstr, ##args); fflush(logfile); } } while (0)
+#if defined(DEBUG_BLOCK)
+#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (qemu_log_enabled()) \
+ { qemu_log(formatCstr, ##args); qemu_log_flush(); } } while (0)
#else
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
#endif
+/* OS X does not have O_DSYNC */
+#ifndef O_DSYNC
+#define O_DSYNC O_SYNC
+#endif
+
+/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
+#ifndef O_DIRECT
+#define O_DIRECT O_DSYNC
+#endif
+
#define FTYPE_FILE 0
#define FTYPE_CD 1
#define FTYPE_FD 2
int fd_got_error;
int fd_media_changed;
#endif
-#if defined(O_DIRECT) && !defined(QEMU_IMG)
uint8_t* aligned_buf;
-#endif
} BDRVRawState;
+static int posix_aio_init(void);
+
static int fd_open(BlockDriverState *bs);
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
+ posix_aio_init();
+
s->lseek_err_cnt = 0;
open_flags = O_BINARY;
}
if (flags & BDRV_O_CREAT)
open_flags |= O_CREAT | O_TRUNC;
-#ifdef O_DIRECT
- if (flags & BDRV_O_DIRECT)
+
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
-#endif
+ else if (!(flags & BDRV_O_CACHE_WB))
+ open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
return ret;
}
s->fd = fd;
-#if defined(O_DIRECT) && !defined(QEMU_IMG)
s->aligned_buf = NULL;
- if (flags & BDRV_O_DIRECT) {
+ if ((flags & BDRV_O_NOCACHE)) {
s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
if (s->aligned_buf == NULL) {
ret = -errno;
return ret;
}
}
-#endif
return 0;
}
ret = fd_open(bs);
if (ret < 0)
- return ret;
+ return -errno;
if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
++(s->lseek_err_cnt);
s->fd, bs->filename, offset, buf, count,
bs->total_sectors, errno, strerror(errno));
}
- return -1;
+ return -EIO;
}
s->lseek_err_cnt = 0;
label__raw_write__success:
- return ret;
+ return (ret < 0) ? -errno : ret;
}
-#if defined(O_DIRECT) && !defined(QEMU_IMG)
/*
* offset and count are in bytes and possibly not aligned. For files opened
* with O_DIRECT, necessary alignments are ensured before calling
return raw_pwrite_aligned(bs, offset, buf, count) + sum;
}
-#else
-#define raw_pread raw_pread_aligned
-#define raw_pwrite raw_pwrite_aligned
-#endif
-
-
#ifdef CONFIG_AIO
/***********************************************************/
/* Unix AIO using POSIX AIO */
typedef struct RawAIOCB {
BlockDriverAIOCB common;
- struct aiocb aiocb;
+ struct qemu_paiocb aiocb;
struct RawAIOCB *next;
int ret;
} RawAIOCB;
-static int aio_sig_fd = -1;
-static int aio_sig_num = SIGUSR2;
-static RawAIOCB *first_aio; /* AIO issued */
-static int aio_initialized = 0;
+typedef struct PosixAioState
+{
+ int rfd, wfd;
+ RawAIOCB *first_aio;
+} PosixAioState;
-static void qemu_aio_poll(void *opaque)
+static void posix_aio_read(void *opaque)
{
+ PosixAioState *s = opaque;
RawAIOCB *acb, **pacb;
int ret;
- size_t offset;
- union {
- struct qemu_signalfd_siginfo siginfo;
- char buf[128];
- } sig;
-
- /* try to read from signalfd, don't freak out if we can't read anything */
- offset = 0;
- while (offset < 128) {
- ssize_t len;
-
- len = read(aio_sig_fd, sig.buf + offset, 128 - offset);
- if (len == -1 && errno == EINTR)
- continue;
- if (len == -1 && errno == EAGAIN) {
- /* there is no natural reason for this to happen,
- * so we'll spin hard until we get everything just
- * to be on the safe side. */
- if (offset > 0)
- continue;
- }
+ ssize_t len;
+
+ /* read all bytes from signal pipe */
+ for (;;) {
+ char bytes[16];
- offset += len;
+ len = read(s->rfd, bytes, sizeof(bytes));
+ if (len == -1 && errno == EINTR)
+ continue; /* try again */
+ if (len == sizeof(bytes))
+ continue; /* more to read */
+ break;
}
for(;;) {
- pacb = &first_aio;
+ pacb = &s->first_aio;
for(;;) {
acb = *pacb;
if (!acb)
goto the_end;
- ret = aio_error(&acb->aiocb);
+ ret = qemu_paio_error(&acb->aiocb);
if (ret == ECANCELED) {
/* remove the request */
*pacb = acb->next;
} else if (ret != EINPROGRESS) {
/* end of aio */
if (ret == 0) {
- ret = aio_return(&acb->aiocb);
+ ret = qemu_paio_return(&acb->aiocb);
if (ret == acb->aiocb.aio_nbytes)
ret = 0;
else
the_end: ;
}
-void qemu_aio_init(void)
+static int posix_aio_flush(void *opaque)
{
- sigset_t mask;
-
- aio_initialized = 1;
-
- /* Make sure to block AIO signal */
- sigemptyset(&mask);
- sigaddset(&mask, aio_sig_num);
- sigprocmask(SIG_BLOCK, &mask, NULL);
-
- aio_sig_fd = qemu_signalfd(&mask);
+ PosixAioState *s = opaque;
+ return !!s->first_aio;
+}
- fcntl(aio_sig_fd, F_SETFL, O_NONBLOCK);
+static PosixAioState *posix_aio_state;
-#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
- qemu_set_fd_handler2(aio_sig_fd, NULL, qemu_aio_poll, NULL, NULL);
-#endif
+static void aio_signal_handler(int signum)
+{
+ if (posix_aio_state) {
+ char byte = 0;
-#if defined(__GLIBC__) && defined(__linux__)
- {
- /* XXX: aio thread exit seems to hang on RedHat 9 and this init
- seems to fix the problem. */
- struct aioinit ai;
- memset(&ai, 0, sizeof(ai));
- ai.aio_threads = 1;
- ai.aio_num = 1;
- ai.aio_idle_time = 365 * 100000;
- aio_init(&ai);
+ write(posix_aio_state->wfd, &byte, sizeof(byte));
}
-#endif
-}
-/* Wait for all IO requests to complete. */
-void qemu_aio_flush(void)
-{
- qemu_aio_poll(NULL);
- while (first_aio) {
- qemu_aio_wait();
- }
+ qemu_service_io();
}
-void qemu_aio_wait(void)
+static int posix_aio_init(void)
{
- int ret;
+ struct sigaction act;
+ PosixAioState *s;
+ int fds[2];
+ struct qemu_paioinit ai;
+
+ if (posix_aio_state)
+ return 0;
-#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
- if (qemu_bh_poll())
- return;
-#endif
+ s = qemu_malloc(sizeof(PosixAioState));
- if (!first_aio)
- return;
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
+ act.sa_handler = aio_signal_handler;
+ sigaction(SIGUSR2, &act, NULL);
- do {
- fd_set rdfds;
+ s->first_aio = NULL;
+ if (pipe(fds) == -1) {
+ fprintf(stderr, "failed to create pipe\n");
+ return -errno;
+ }
- FD_ZERO(&rdfds);
- FD_SET(aio_sig_fd, &rdfds);
+ s->rfd = fds[0];
+ s->wfd = fds[1];
- ret = select(aio_sig_fd + 1, &rdfds, NULL, NULL, NULL);
- if (ret == -1 && errno == EINTR)
- continue;
- } while (ret == 0);
+ fcntl(s->rfd, F_SETFL, O_NONBLOCK);
+ fcntl(s->wfd, F_SETFL, O_NONBLOCK);
- qemu_aio_poll(NULL);
+ qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
+
+ memset(&ai, 0, sizeof(ai));
+ ai.aio_threads = 64;
+ ai.aio_num = 64;
+ qemu_paio_init(&ai);
+
+ posix_aio_state = s;
+
+ return 0;
}
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
if (!acb)
return NULL;
acb->aiocb.aio_fildes = s->fd;
- acb->aiocb.aio_sigevent.sigev_signo = aio_sig_num;
- acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ acb->aiocb.ev_signo = SIGUSR2;
acb->aiocb.aio_buf = buf;
if (nb_sectors < 0)
acb->aiocb.aio_nbytes = -nb_sectors;
else
acb->aiocb.aio_nbytes = nb_sectors * 512;
acb->aiocb.aio_offset = sector_num * 512;
- acb->next = first_aio;
- first_aio = acb;
+ acb->next = posix_aio_state->first_aio;
+ posix_aio_state->first_aio = acb;
return acb;
}
-#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
static void raw_aio_em_cb(void* opaque)
{
RawAIOCB *acb = opaque;
acb->common.cb(acb->common.opaque, acb->ret);
qemu_aio_release(acb);
}
-#endif
+
+static void raw_aio_remove(RawAIOCB *acb)
+{
+ RawAIOCB **pacb;
+
+ /* remove the callback from the queue */
+ pacb = &posix_aio_state->first_aio;
+ for(;;) {
+ if (*pacb == NULL) {
+ break;
+ } else if (*pacb == acb) {
+ *pacb = acb->next;
+ qemu_aio_release(acb);
+ break;
+ }
+ pacb = &acb->next;
+ }
+}
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors,
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
-#if defined(O_DIRECT) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
qemu_bh_schedule(bh);
return &acb->common;
}
-#endif
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
- if (aio_read(&acb->aiocb) < 0) {
- qemu_aio_release(acb);
+ if (qemu_paio_read(&acb->aiocb) < 0) {
+ raw_aio_remove(acb);
return NULL;
}
return &acb->common;
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
-#if defined(O_DIRECT) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
qemu_bh_schedule(bh);
return &acb->common;
}
-#endif
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
- if (aio_write(&acb->aiocb) < 0) {
- qemu_aio_release(acb);
+ if (qemu_paio_write(&acb->aiocb) < 0) {
+ raw_aio_remove(acb);
return NULL;
}
return &acb->common;
{
int ret;
RawAIOCB *acb = (RawAIOCB *)blockacb;
- RawAIOCB **pacb;
- ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
- if (ret == AIO_NOTCANCELED) {
+ ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
+ if (ret == QEMU_PAIO_NOTCANCELED) {
/* fail safe: if the aio could not be canceled, we wait for
it */
- while (aio_error(&acb->aiocb) == EINPROGRESS);
+ while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
}
- /* remove the callback from the queue */
- pacb = &first_aio;
- for(;;) {
- if (*pacb == NULL) {
- break;
- } else if (*pacb == acb) {
- *pacb = acb->next;
- qemu_aio_release(acb);
- break;
- }
- pacb = &acb->next;
- }
-}
-
-# else /* CONFIG_AIO */
-
-void qemu_aio_init(void)
-{
-}
-
-void qemu_aio_flush(void)
-{
+ raw_aio_remove(acb);
}
-
-void qemu_aio_wait(void)
+#else /* CONFIG_AIO */
+static int posix_aio_init(void)
{
-#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
- qemu_bh_poll();
-#endif
+ return 0;
}
-
#endif /* CONFIG_AIO */
+
static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
-#if defined(O_DIRECT) && !defined(QEMU_IMG)
if (s->aligned_buf != NULL)
qemu_free(s->aligned_buf);
-#endif
}
}
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB),
#endif
- .protocol_name = "file",
+
.bdrv_pread = raw_pread,
.bdrv_pwrite = raw_pwrite,
.bdrv_truncate = raw_truncate,
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
+ posix_aio_init();
+
#ifdef CONFIG_COCOA
if (strstart(filename, "/dev/cdrom", NULL)) {
kern_return_t kernResult;
open_flags |= O_RDONLY;
bs->read_only = 1;
}
-#ifdef O_DIRECT
- if (flags & BDRV_O_DIRECT)
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
-#endif
+ else if (!(flags & BDRV_O_CACHE_WB))
+ open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
#if defined(__linux__)
return 0;
}
-#if defined(__linux__) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
-
+#if defined(__linux__)
/* Note: we do not have a reliable method to detect if the floppy is
present. The current method is to try to open the floppy at every
I/O and to keep it opened during a few hundreds of ms. */
s->fd_got_error = 0;
return 0;
}
-#else
-static int fd_open(BlockDriverState *bs)
-{
- return 0;
-}
-#endif
-
-#if defined(__linux__)
static int raw_is_inserted(BlockDriverState *bs)
{
}
#else
+static int fd_open(BlockDriverState *bs)
+{
+ return 0;
+}
+
static int raw_is_inserted(BlockDriverState *bs)
{
return 1;
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB),
#endif
+
.bdrv_pread = raw_pread,
.bdrv_pwrite = raw_pwrite,
.bdrv_getlength = raw_getlength,