#include "qemu-timer.h"
#include "qemu-char.h"
#include "block_int.h"
-#include "compatfd.h"
#include <assert.h>
#ifdef CONFIG_AIO
#include <aio.h>
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
#endif
+/* OS X does not have O_DSYNC */
+#ifndef O_DSYNC
+#define O_DSYNC O_SYNC
+#endif
+
+/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
+#ifndef O_DIRECT
+#define O_DIRECT O_DSYNC
+#endif
+
#define FTYPE_FILE 0
#define FTYPE_CD 1
#define FTYPE_FD 2
reopen it to see if the disk has been changed */
#define FD_OPEN_TIMEOUT 1000
+/* posix-aio doesn't allow multiple outstanding requests to a single file
+ * descriptor. we implement a pool of dup()'d file descriptors to work
+ * around this */
+#define RAW_FD_POOL_SIZE 64
+
typedef struct BDRVRawState {
int fd;
int type;
unsigned int lseek_err_cnt;
+ int fd_pool[RAW_FD_POOL_SIZE];
#if defined(__linux__)
/* linux floppy specific */
int fd_open_flags;
int fd_got_error;
int fd_media_changed;
#endif
-#if defined(O_DIRECT)
uint8_t* aligned_buf;
-#endif
} BDRVRawState;
static int posix_aio_init(void);
{
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
+ int i;
posix_aio_init();
}
if (flags & BDRV_O_CREAT)
open_flags |= O_CREAT | O_TRUNC;
-#ifdef O_DIRECT
- if (flags & BDRV_O_DIRECT)
+
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
-#endif
+ else if (!(flags & BDRV_O_CACHE_WB))
+ open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
return ret;
}
s->fd = fd;
-#if defined(O_DIRECT)
+ for (i = 0; i < RAW_FD_POOL_SIZE; i++)
+ s->fd_pool[i] = -1;
s->aligned_buf = NULL;
- if (flags & BDRV_O_DIRECT) {
+ if ((flags & BDRV_O_NOCACHE)) {
s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
if (s->aligned_buf == NULL) {
ret = -errno;
return ret;
}
}
-#endif
return 0;
}
}
-#if defined(O_DIRECT)
/*
* offset and count are in bytes and possibly not aligned. For files opened
* with O_DIRECT, necessary alignments are ensured before calling
return raw_pwrite_aligned(bs, offset, buf, count) + sum;
}
-#else
-#define raw_pread raw_pread_aligned
-#define raw_pwrite raw_pwrite_aligned
-#endif
-
-
#ifdef CONFIG_AIO
/***********************************************************/
/* Unix AIO using POSIX AIO */
typedef struct RawAIOCB {
BlockDriverAIOCB common;
+ int fd;
struct aiocb aiocb;
struct RawAIOCB *next;
int ret;
typedef struct PosixAioState
{
- int fd;
+ int rfd, wfd;
RawAIOCB *first_aio;
} PosixAioState;
+static int raw_fd_pool_get(BDRVRawState *s)
+{
+ int i;
+
+ for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
+ /* already in use */
+ if (s->fd_pool[i] != -1)
+ continue;
+
+ /* try to dup file descriptor */
+ s->fd_pool[i] = dup(s->fd);
+ if (s->fd_pool[i] != -1)
+ return s->fd_pool[i];
+ }
+
+ /* we couldn't dup the file descriptor so just use the main one */
+ return s->fd;
+}
+
+static void raw_fd_pool_put(RawAIOCB *acb)
+{
+ BDRVRawState *s = acb->common.bs->opaque;
+ int i;
+
+ for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
+ if (s->fd_pool[i] == acb->fd) {
+ close(s->fd_pool[i]);
+ s->fd_pool[i] = -1;
+ }
+ }
+}
+
static void posix_aio_read(void *opaque)
{
PosixAioState *s = opaque;
RawAIOCB *acb, **pacb;
int ret;
- size_t offset;
- union {
- struct qemu_signalfd_siginfo siginfo;
- char buf[128];
- } sig;
-
- /* try to read from signalfd, don't freak out if we can't read anything */
- offset = 0;
- while (offset < 128) {
- ssize_t len;
-
- len = read(s->fd, sig.buf + offset, 128 - offset);
+ ssize_t len;
+
+ do {
+ char byte;
+
+ len = read(s->rfd, &byte, 1);
if (len == -1 && errno == EINTR)
continue;
- if (len == -1 && errno == EAGAIN) {
- /* there is no natural reason for this to happen,
- * so we'll spin hard until we get everything just
- * to be on the safe side. */
- if (offset > 0)
- continue;
- }
-
- offset += len;
- }
+ if (len == -1 && errno == EAGAIN)
+ break;
+ } while (len == -1);
for(;;) {
pacb = &s->first_aio;
if (ret == ECANCELED) {
/* remove the request */
*pacb = acb->next;
+ raw_fd_pool_put(acb);
qemu_aio_release(acb);
} else if (ret != EINPROGRESS) {
/* end of aio */
*pacb = acb->next;
/* call the callback */
acb->common.cb(acb->common.opaque, ret);
+ raw_fd_pool_put(acb);
qemu_aio_release(acb);
break;
} else {
static PosixAioState *posix_aio_state;
+static void aio_signal_handler(int signum)
+{
+ if (posix_aio_state) {
+ char byte = 0;
+
+ write(posix_aio_state->wfd, &byte, sizeof(byte));
+ }
+
+ qemu_service_io();
+}
+
static int posix_aio_init(void)
{
- sigset_t mask;
+ struct sigaction act;
PosixAioState *s;
- struct aioinit ai;
+ int fds[2];
if (posix_aio_state)
return 0;
if (s == NULL)
return -ENOMEM;
- /* Make sure to block AIO signal */
- sigemptyset(&mask);
- sigaddset(&mask, SIGUSR2);
- sigprocmask(SIG_BLOCK, &mask, NULL);
-
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
+ act.sa_handler = aio_signal_handler;
+ sigaction(SIGUSR2, &act, NULL);
+
s->first_aio = NULL;
- s->fd = qemu_signalfd(&mask);
+ if (pipe(fds) == -1) {
+ fprintf(stderr, "failed to create pipe\n");
+ return -errno;
+ }
+
+ s->rfd = fds[0];
+ s->wfd = fds[1];
- fcntl(s->fd, F_SETFL, O_NONBLOCK);
+ fcntl(s->wfd, F_SETFL, O_NONBLOCK);
- qemu_aio_set_fd_handler(s->fd, posix_aio_read, NULL, posix_aio_flush, s);
+ qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
- memset(&ai, 0, sizeof(ai));
-#if !defined(__linux__) || (defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 4))
- ai.aio_threads = 5;
- ai.aio_num = 1;
+#if defined(__linux__)
+ {
+ struct aioinit ai;
+
+ memset(&ai, 0, sizeof(ai));
+#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 4)
+ ai.aio_threads = 64;
+ ai.aio_num = 64;
#else
- /* XXX: aio thread exit seems to hang on RedHat 9 and this init
- seems to fix the problem. */
- ai.aio_threads = 1;
- ai.aio_num = 1;
- ai.aio_idle_time = 365 * 100000;
+ /* XXX: aio thread exit seems to hang on RedHat 9 and this init
+ seems to fix the problem. */
+ ai.aio_threads = 1;
+ ai.aio_num = 1;
+ ai.aio_idle_time = 365 * 100000;
+#endif
+ aio_init(&ai);
}
#endif
- aio_init(&ai);
posix_aio_state = s;
return 0;
acb = qemu_aio_get(bs, cb, opaque);
if (!acb)
return NULL;
- acb->aiocb.aio_fildes = s->fd;
+ acb->fd = raw_fd_pool_get(s);
+ acb->aiocb.aio_fildes = acb->fd;
acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
acb->aiocb.aio_buf = buf;
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
-#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
qemu_bh_schedule(bh);
return &acb->common;
}
-#endif
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
-#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
qemu_bh_schedule(bh);
return &acb->common;
}
-#endif
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
break;
} else if (*pacb == acb) {
*pacb = acb->next;
+ raw_fd_pool_put(acb);
qemu_aio_release(acb);
break;
}
#else /* CONFIG_AIO */
static int posix_aio_init(void)
{
+ return 0;
}
#endif /* CONFIG_AIO */
+static void raw_close_fd_pool(BDRVRawState *s)
+{
+ int i;
+
+ for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
+ if (s->fd_pool[i] != -1) {
+ close(s->fd_pool[i]);
+ s->fd_pool[i] = -1;
+ }
+ }
+}
+
static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
-#if defined(O_DIRECT)
if (s->aligned_buf != NULL)
qemu_free(s->aligned_buf);
-#endif
}
+ raw_close_fd_pool(s);
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
{
BDRVRawState *s = bs->opaque;
- int fd, open_flags, ret;
+ int fd, open_flags, ret, i;
posix_aio_init();
open_flags |= O_RDONLY;
bs->read_only = 1;
}
-#ifdef O_DIRECT
- if (flags & BDRV_O_DIRECT)
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
-#endif
+ else if (!(flags & BDRV_O_CACHE_WB))
+ open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
#if defined(__linux__)
return ret;
}
s->fd = fd;
+ for (i = 0; i < RAW_FD_POOL_SIZE; i++)
+ s->fd_pool[i] = -1;
#if defined(__linux__)
/* close fd so that we can reopen it as needed */
if (s->type == FTYPE_FD) {
}
#if defined(__linux__)
-
/* Note: we do not have a reliable method to detect if the floppy is
present. The current method is to try to open the floppy at every
I/O and to keep it opened during a few hundreds of ms. */
(qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
close(s->fd);
s->fd = -1;
+ raw_close_fd_pool(s);
#ifdef DEBUG_FLOPPY
printf("Floppy closed\n");
#endif
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
+ raw_close_fd_pool(s);
}
fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
if (fd >= 0) {