#include "qemu-timer.h"
#include "qemu-char.h"
#include "block_int.h"
-#include "compatfd.h"
#include <assert.h>
#ifdef CONFIG_AIO
-#include <aio.h>
+#include "posix-aio-compat.h"
#endif
#ifdef CONFIG_COCOA
//#define DEBUG_BLOCK
#if defined(DEBUG_BLOCK)
-#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (loglevel != 0) \
- { fprintf(logfile, formatCstr, ##args); fflush(logfile); } } while (0)
+#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (qemu_log_enabled()) \
+ { qemu_log(formatCstr, ##args); qemu_log_flush(); } } while (0)
#else
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
#endif
+/* OS X does not have O_DSYNC */
+#ifndef O_DSYNC
+#define O_DSYNC O_SYNC
+#endif
+
+/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
+#ifndef O_DIRECT
+#define O_DIRECT O_DSYNC
+#endif
+
#define FTYPE_FILE 0
#define FTYPE_CD 1
#define FTYPE_FD 2
reopen it to see if the disk has been changed */
#define FD_OPEN_TIMEOUT 1000
-/* posix-aio doesn't allow multiple outstanding requests to a single file
- * descriptor. we implement a pool of dup()'d file descriptors to work
- * around this */
-#define RAW_FD_POOL_SIZE 64
-
typedef struct BDRVRawState {
int fd;
int type;
unsigned int lseek_err_cnt;
- int fd_pool[RAW_FD_POOL_SIZE];
#if defined(__linux__)
/* linux floppy specific */
int fd_open_flags;
int fd_got_error;
int fd_media_changed;
#endif
-#if defined(O_DIRECT)
uint8_t* aligned_buf;
-#endif
} BDRVRawState;
static int posix_aio_init(void);
{
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
- int i;
posix_aio_init();
}
if (flags & BDRV_O_CREAT)
open_flags |= O_CREAT | O_TRUNC;
-#ifdef O_DIRECT
- if (flags & BDRV_O_DIRECT)
+
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
-#endif
+ else if (!(flags & BDRV_O_CACHE_WB))
+ open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
return ret;
}
s->fd = fd;
- for (i = 0; i < RAW_FD_POOL_SIZE; i++)
- s->fd_pool[i] = -1;
-#if defined(O_DIRECT)
s->aligned_buf = NULL;
- if (flags & BDRV_O_DIRECT) {
+ if ((flags & BDRV_O_NOCACHE)) {
s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
if (s->aligned_buf == NULL) {
ret = -errno;
return ret;
}
}
-#endif
return 0;
}
ret = fd_open(bs);
if (ret < 0)
- return ret;
+ return -errno;
if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
++(s->lseek_err_cnt);
s->fd, bs->filename, offset, buf, count,
bs->total_sectors, errno, strerror(errno));
}
- return -1;
+ return -EIO;
}
s->lseek_err_cnt = 0;
label__raw_write__success:
- return ret;
+ return (ret < 0) ? -errno : ret;
}
-#if defined(O_DIRECT)
/*
* offset and count are in bytes and possibly not aligned. For files opened
* with O_DIRECT, necessary alignments are ensured before calling
return raw_pwrite_aligned(bs, offset, buf, count) + sum;
}
-#else
-#define raw_pread raw_pread_aligned
-#define raw_pwrite raw_pwrite_aligned
-#endif
-
-
#ifdef CONFIG_AIO
/***********************************************************/
/* Unix AIO using POSIX AIO */
typedef struct RawAIOCB {
BlockDriverAIOCB common;
- int fd;
- struct aiocb aiocb;
+ struct qemu_paiocb aiocb;
struct RawAIOCB *next;
int ret;
} RawAIOCB;
typedef struct PosixAioState
{
- int fd;
+ int rfd, wfd;
RawAIOCB *first_aio;
} PosixAioState;
-static int raw_fd_pool_get(BDRVRawState *s)
-{
- int i;
-
- for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
- /* already in use */
- if (s->fd_pool[i] != -1)
- continue;
-
- /* try to dup file descriptor */
- s->fd_pool[i] = dup(s->fd);
- if (s->fd_pool[i] != -1)
- return s->fd_pool[i];
- }
-
- /* we couldn't dup the file descriptor so just use the main one */
- return s->fd;
-}
-
-static void raw_fd_pool_put(RawAIOCB *acb)
-{
- BDRVRawState *s = acb->common.bs->opaque;
- int i;
-
- for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
- if (s->fd_pool[i] == acb->fd) {
- close(s->fd_pool[i]);
- s->fd_pool[i] = -1;
- }
- }
-}
-
static void posix_aio_read(void *opaque)
{
PosixAioState *s = opaque;
RawAIOCB *acb, **pacb;
int ret;
- size_t offset;
- union {
- struct qemu_signalfd_siginfo siginfo;
- char buf[128];
- } sig;
-
- /* try to read from signalfd, don't freak out if we can't read anything */
- offset = 0;
- while (offset < 128) {
- ssize_t len;
-
- len = read(s->fd, sig.buf + offset, 128 - offset);
- if (len == -1 && errno == EINTR)
- continue;
- if (len == -1 && errno == EAGAIN) {
- /* there is no natural reason for this to happen,
- * so we'll spin hard until we get everything just
- * to be on the safe side. */
- if (offset > 0)
- continue;
- }
+ ssize_t len;
+
+ /* read all bytes from signal pipe */
+ for (;;) {
+ char bytes[16];
- offset += len;
+ len = read(s->rfd, bytes, sizeof(bytes));
+ if (len == -1 && errno == EINTR)
+ continue; /* try again */
+ if (len == sizeof(bytes))
+ continue; /* more to read */
+ break;
}
for(;;) {
acb = *pacb;
if (!acb)
goto the_end;
- ret = aio_error(&acb->aiocb);
+ ret = qemu_paio_error(&acb->aiocb);
if (ret == ECANCELED) {
/* remove the request */
*pacb = acb->next;
- raw_fd_pool_put(acb);
qemu_aio_release(acb);
} else if (ret != EINPROGRESS) {
/* end of aio */
if (ret == 0) {
- ret = aio_return(&acb->aiocb);
+ ret = qemu_paio_return(&acb->aiocb);
if (ret == acb->aiocb.aio_nbytes)
ret = 0;
else
*pacb = acb->next;
/* call the callback */
acb->common.cb(acb->common.opaque, ret);
- raw_fd_pool_put(acb);
qemu_aio_release(acb);
break;
} else {
static PosixAioState *posix_aio_state;
+static void aio_signal_handler(int signum)
+{
+ if (posix_aio_state) {
+ char byte = 0;
+
+ write(posix_aio_state->wfd, &byte, sizeof(byte));
+ }
+
+ qemu_service_io();
+}
+
static int posix_aio_init(void)
{
- sigset_t mask;
+ struct sigaction act;
PosixAioState *s;
+ int fds[2];
+ struct qemu_paioinit ai;
if (posix_aio_state)
return 0;
s = qemu_malloc(sizeof(PosixAioState));
- if (s == NULL)
- return -ENOMEM;
-
- /* Make sure to block AIO signal */
- sigemptyset(&mask);
- sigaddset(&mask, SIGUSR2);
- sigprocmask(SIG_BLOCK, &mask, NULL);
-
+
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
+ act.sa_handler = aio_signal_handler;
+ sigaction(SIGUSR2, &act, NULL);
+
s->first_aio = NULL;
- s->fd = qemu_signalfd(&mask);
- if (s->fd == -1) {
- fprintf(stderr, "failed to create signalfd\n");
+ if (pipe(fds) == -1) {
+ fprintf(stderr, "failed to create pipe\n");
return -errno;
}
- fcntl(s->fd, F_SETFL, O_NONBLOCK);
+ s->rfd = fds[0];
+ s->wfd = fds[1];
- qemu_aio_set_fd_handler(s->fd, posix_aio_read, NULL, posix_aio_flush, s);
+ fcntl(s->rfd, F_SETFL, O_NONBLOCK);
+ fcntl(s->wfd, F_SETFL, O_NONBLOCK);
-#if defined(__linux__)
- {
- struct aioinit ai;
+ qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
+
+ memset(&ai, 0, sizeof(ai));
+ ai.aio_threads = 64;
+ ai.aio_num = 64;
+ qemu_paio_init(&ai);
- memset(&ai, 0, sizeof(ai));
-#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 4)
- ai.aio_threads = 64;
- ai.aio_num = 64;
-#else
- /* XXX: aio thread exit seems to hang on RedHat 9 and this init
- seems to fix the problem. */
- ai.aio_threads = 1;
- ai.aio_num = 1;
- ai.aio_idle_time = 365 * 100000;
-#endif
- aio_init(&ai);
- }
-#endif
posix_aio_state = s;
return 0;
acb = qemu_aio_get(bs, cb, opaque);
if (!acb)
return NULL;
- acb->fd = raw_fd_pool_get(s);
- acb->aiocb.aio_fildes = acb->fd;
- acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
- acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ acb->aiocb.aio_fildes = s->fd;
+ acb->aiocb.ev_signo = SIGUSR2;
acb->aiocb.aio_buf = buf;
if (nb_sectors < 0)
acb->aiocb.aio_nbytes = -nb_sectors;
qemu_aio_release(acb);
}
+static void raw_aio_remove(RawAIOCB *acb)
+{
+ RawAIOCB **pacb;
+
+ /* remove the callback from the queue */
+ pacb = &posix_aio_state->first_aio;
+ for(;;) {
+ if (*pacb == NULL) {
+ break;
+ } else if (*pacb == acb) {
+ *pacb = acb->next;
+ qemu_aio_release(acb);
+ break;
+ }
+ pacb = &acb->next;
+ }
+}
+
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
-#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
qemu_bh_schedule(bh);
return &acb->common;
}
-#endif
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
- if (aio_read(&acb->aiocb) < 0) {
- qemu_aio_release(acb);
+ if (qemu_paio_read(&acb->aiocb) < 0) {
+ raw_aio_remove(acb);
return NULL;
}
return &acb->common;
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
-#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
qemu_bh_schedule(bh);
return &acb->common;
}
-#endif
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
- if (aio_write(&acb->aiocb) < 0) {
- qemu_aio_release(acb);
+ if (qemu_paio_write(&acb->aiocb) < 0) {
+ raw_aio_remove(acb);
return NULL;
}
return &acb->common;
{
int ret;
RawAIOCB *acb = (RawAIOCB *)blockacb;
- RawAIOCB **pacb;
- ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
- if (ret == AIO_NOTCANCELED) {
+ ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
+ if (ret == QEMU_PAIO_NOTCANCELED) {
/* fail safe: if the aio could not be canceled, we wait for
it */
- while (aio_error(&acb->aiocb) == EINPROGRESS);
+ while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
}
- /* remove the callback from the queue */
- pacb = &posix_aio_state->first_aio;
- for(;;) {
- if (*pacb == NULL) {
- break;
- } else if (*pacb == acb) {
- *pacb = acb->next;
- raw_fd_pool_put(acb);
- qemu_aio_release(acb);
- break;
- }
- pacb = &acb->next;
- }
+ raw_aio_remove(acb);
}
-
#else /* CONFIG_AIO */
static int posix_aio_init(void)
{
}
#endif /* CONFIG_AIO */
-static void raw_close_fd_pool(BDRVRawState *s)
-{
- int i;
-
- for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
- if (s->fd_pool[i] != -1) {
- close(s->fd_pool[i]);
- s->fd_pool[i] = -1;
- }
- }
-}
static void raw_close(BlockDriverState *bs)
{
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
-#if defined(O_DIRECT)
if (s->aligned_buf != NULL)
qemu_free(s->aligned_buf);
-#endif
}
- raw_close_fd_pool(s);
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB),
#endif
+
.bdrv_pread = raw_pread,
.bdrv_pwrite = raw_pwrite,
.bdrv_truncate = raw_truncate,
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
{
BDRVRawState *s = bs->opaque;
- int fd, open_flags, ret, i;
+ int fd, open_flags, ret;
posix_aio_init();
open_flags |= O_RDONLY;
bs->read_only = 1;
}
-#ifdef O_DIRECT
- if (flags & BDRV_O_DIRECT)
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
-#endif
+ else if (!(flags & BDRV_O_CACHE_WB))
+ open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
#if defined(__linux__)
return ret;
}
s->fd = fd;
- for (i = 0; i < RAW_FD_POOL_SIZE; i++)
- s->fd_pool[i] = -1;
#if defined(__linux__)
/* close fd so that we can reopen it as needed */
if (s->type == FTYPE_FD) {
(qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
close(s->fd);
s->fd = -1;
- raw_close_fd_pool(s);
#ifdef DEBUG_FLOPPY
printf("Floppy closed\n");
#endif
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
- raw_close_fd_pool(s);
}
fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
if (fd >= 0) {
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB),
#endif
+
.bdrv_pread = raw_pread,
.bdrv_pwrite = raw_pwrite,
.bdrv_getlength = raw_getlength,