#include "qemu-common.h"
#include "qemu-timer.h"
#include "qemu-char.h"
+#include "qemu-log.h"
#include "block_int.h"
#include "module.h"
-#ifdef CONFIG_AIO
-#include "posix-aio-compat.h"
-#endif
+#include "block/raw-posix-aio.h"
#ifdef CONFIG_COCOA
#include <paths.h>
#ifdef __sun__
#define _POSIX_PTHREAD_SEMANTICS 1
-#include <signal.h>
#include <sys/dkio.h>
#endif
#ifdef __linux__
+#include <sys/types.h>
+#include <sys/stat.h>
#include <sys/ioctl.h>
+#include <sys/param.h>
#include <linux/cdrom.h>
#include <linux/fd.h>
#endif
-#ifdef __FreeBSD__
-#include <signal.h>
+#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
#include <sys/disk.h>
#include <sys/cdio.h>
#endif
#include <sys/dkio.h>
#endif
+#ifdef __NetBSD__
+#include <sys/ioctl.h>
+#include <sys/disklabel.h>
+#include <sys/dkio.h>
+#include <sys/disk.h>
+#endif
+
#ifdef __DragonFly__
#include <sys/ioctl.h>
#include <sys/diskslice.h>
#endif
+#ifdef CONFIG_XFS
+#include <xfs/xfs.h>
+#endif
+
//#define DEBUG_FLOPPY
//#define DEBUG_BLOCK
/* OS X does not have O_DSYNC */
#ifndef O_DSYNC
+#ifdef O_SYNC
#define O_DSYNC O_SYNC
+#elif defined(O_FSYNC)
+#define O_DSYNC O_FSYNC
+#endif
#endif
/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
#define FTYPE_CD 1
#define FTYPE_FD 2
-#define ALIGNED_BUFFER_SIZE (32 * 512)
-
-/* if the FD is not accessed during that time (in ms), we try to
+/* if the FD is not accessed during that time (in ns), we try to
reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT 1000
+#define FD_OPEN_TIMEOUT (1000000000)
+
+#define MAX_BLOCKSIZE 4096
typedef struct BDRVRawState {
int fd;
int type;
- unsigned int lseek_err_cnt;
int open_flags;
#if defined(__linux__)
/* linux floppy specific */
int fd_got_error;
int fd_media_changed;
#endif
- uint8_t* aligned_buf;
+#ifdef CONFIG_LINUX_AIO
+ int use_aio;
+ void *aio_ctx;
+#endif
+ uint8_t *aligned_buf;
+ unsigned aligned_buf_size;
+#ifdef CONFIG_XFS
+ bool is_xfs : 1;
+#endif
} BDRVRawState;
-static int posix_aio_init(void);
-
static int fd_open(BlockDriverState *bs);
+static int64_t raw_getlength(BlockDriverState *bs);
-#if defined(__FreeBSD__)
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static int cdrom_reopen(BlockDriverState *bs);
#endif
+#if defined(__NetBSD__)
+static int raw_normalize_devicepath(const char **filename)
+{
+ static char namebuf[PATH_MAX];
+ const char *dp, *fname;
+ struct stat sb;
+
+ fname = *filename;
+ dp = strrchr(fname, '/');
+ if (lstat(fname, &sb) < 0) {
+ fprintf(stderr, "%s: stat failed: %s\n",
+ fname, strerror(errno));
+ return -errno;
+ }
+
+ if (!S_ISBLK(sb.st_mode)) {
+ return 0;
+ }
+
+ if (dp == NULL) {
+ snprintf(namebuf, PATH_MAX, "r%s", fname);
+ } else {
+ snprintf(namebuf, PATH_MAX, "%.*s/r%s",
+ (int)(dp - fname), fname, dp + 1);
+ }
+ fprintf(stderr, "%s is a block device", fname);
+ *filename = namebuf;
+ fprintf(stderr, ", using %s\n", *filename);
+
+ return 0;
+}
+#else
+static int raw_normalize_devicepath(const char **filename)
+{
+ return 0;
+}
+#endif
+
static int raw_open_common(BlockDriverState *bs, const char *filename,
int bdrv_flags, int open_flags)
{
BDRVRawState *s = bs->opaque;
int fd, ret;
- posix_aio_init();
-
- s->lseek_err_cnt = 0;
+ ret = raw_normalize_devicepath(&filename);
+ if (ret != 0) {
+ return ret;
+ }
s->open_flags = open_flags | O_BINARY;
s->open_flags &= ~O_ACCMODE;
- if ((bdrv_flags & BDRV_O_ACCESS) == BDRV_O_RDWR) {
+ if (bdrv_flags & BDRV_O_RDWR) {
s->open_flags |= O_RDWR;
} else {
s->open_flags |= O_RDONLY;
- bs->read_only = 1;
}
/* Use O_DSYNC for write-through caching, no flags for write-back caching,
* and O_DIRECT for no caching. */
if ((bdrv_flags & BDRV_O_NOCACHE))
s->open_flags |= O_DIRECT;
- else if (!(bdrv_flags & BDRV_O_CACHE_WB))
+ if (!(bdrv_flags & BDRV_O_CACHE_WB))
s->open_flags |= O_DSYNC;
s->fd = -1;
- fd = open(filename, s->open_flags, 0644);
+ fd = qemu_open(filename, s->open_flags, 0644);
if (fd < 0) {
ret = -errno;
if (ret == -EROFS)
}
s->fd = fd;
s->aligned_buf = NULL;
+
if ((bdrv_flags & BDRV_O_NOCACHE)) {
- s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
+ /*
+ * Allocate a buffer for read/modify/write cycles. Chose the size
+ * pessimistically as we don't know the block size yet.
+ */
+ s->aligned_buf_size = 32 * MAX_BLOCKSIZE;
+ s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size);
if (s->aligned_buf == NULL) {
- ret = -errno;
- close(fd);
- return ret;
+ goto out_close;
+ }
+ }
+
+#ifdef CONFIG_LINUX_AIO
+ if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+ (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
+
+ /* We're falling back to POSIX AIO in some cases */
+ paio_init();
+
+ s->aio_ctx = laio_init();
+ if (!s->aio_ctx) {
+ goto out_free_buf;
}
+ s->use_aio = 1;
+ } else
+#endif
+ {
+ if (paio_init() < 0) {
+ goto out_free_buf;
+ }
+#ifdef CONFIG_LINUX_AIO
+ s->use_aio = 0;
+#endif
+ }
+
+#ifdef CONFIG_XFS
+ if (platform_test_xfs_fd(s->fd)) {
+ s->is_xfs = 1;
}
+#endif
+
return 0;
+
+out_free_buf:
+ qemu_vfree(s->aligned_buf);
+out_close:
+ close(fd);
+ return -errno;
}
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
{
BDRVRawState *s = bs->opaque;
- int open_flags = 0;
s->type = FTYPE_FILE;
- if (flags & BDRV_O_CREAT)
- open_flags = O_CREAT | O_TRUNC;
-
- return raw_open_common(bs, filename, flags, open_flags);
+ return raw_open_common(bs, filename, flags, 0);
}
/* XXX: use host sector size if necessary with:
}
#endif
#ifdef CONFIG_COCOA
- u_int32_t blockSize = 512;
+ uint32_t blockSize = 512;
if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
bufsize = blockSize;
}
if (ret < 0)
return ret;
- if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
- ++(s->lseek_err_cnt);
- if(s->lseek_err_cnt <= 10) {
- DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
- "] lseek failed : %d = %s\n",
- s->fd, bs->filename, offset, buf, count,
- bs->total_sectors, errno, strerror(errno));
+ ret = pread(s->fd, buf, count, offset);
+ if (ret == count)
+ return ret;
+
+ /* Allow reads beyond the end (needed for pwrite) */
+ if ((ret == 0) && bs->growable) {
+ int64_t size = raw_getlength(bs);
+ if (offset >= size) {
+ memset(buf, 0, count);
+ return count;
}
- return -1;
}
- s->lseek_err_cnt=0;
-
- ret = read(s->fd, buf, count);
- if (ret == count)
- goto label__raw_read__success;
DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
"] read failed %d : %d = %s\n",
bs->total_sectors, ret, errno, strerror(errno));
/* Try harder for CDrom. */
- if (bs->type == BDRV_TYPE_CDROM) {
- lseek(s->fd, offset, SEEK_SET);
- ret = read(s->fd, buf, count);
+ if (s->type != FTYPE_FILE) {
+ ret = pread(s->fd, buf, count, offset);
if (ret == count)
- goto label__raw_read__success;
- lseek(s->fd, offset, SEEK_SET);
- ret = read(s->fd, buf, count);
+ return ret;
+ ret = pread(s->fd, buf, count, offset);
if (ret == count)
- goto label__raw_read__success;
+ return ret;
DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
"] retry read failed %d : %d = %s\n",
bs->total_sectors, ret, errno, strerror(errno));
}
-label__raw_read__success:
-
return (ret < 0) ? -errno : ret;
}
/*
- * offset and count are in bytes, but must be multiples of 512 for files
- * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ * offset and count are in bytes, but must be multiples of the sector size
+ * for files opened with O_DIRECT. buf must be aligned to sector size bytes
+ * then.
*
* This function may be called without alignment if the caller ensures
* that O_DIRECT is not in effect.
if (ret < 0)
return -errno;
- if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
- ++(s->lseek_err_cnt);
- if(s->lseek_err_cnt) {
- DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
- PRId64 "] lseek failed : %d = %s\n",
- s->fd, bs->filename, offset, buf, count,
- bs->total_sectors, errno, strerror(errno));
- }
- return -EIO;
- }
- s->lseek_err_cnt = 0;
-
- ret = write(s->fd, buf, count);
+ ret = pwrite(s->fd, buf, count, offset);
if (ret == count)
- goto label__raw_write__success;
+ return ret;
DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
"] write failed %d : %d = %s\n",
s->fd, bs->filename, offset, buf, count,
bs->total_sectors, ret, errno, strerror(errno));
-label__raw_write__success:
-
return (ret < 0) ? -errno : ret;
}
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
+ unsigned sector_mask = bs->buffer_alignment - 1;
int size, ret, shift, sum;
sum = 0;
if (s->aligned_buf != NULL) {
- if (offset & 0x1ff) {
- /* align offset on a 512 bytes boundary */
+ if (offset & sector_mask) {
+ /* align offset on a sector size bytes boundary */
- shift = offset & 0x1ff;
- size = (shift + count + 0x1ff) & ~0x1ff;
- if (size > ALIGNED_BUFFER_SIZE)
- size = ALIGNED_BUFFER_SIZE;
+ shift = offset & sector_mask;
+ size = (shift + count + sector_mask) & ~sector_mask;
+ if (size > s->aligned_buf_size)
+ size = s->aligned_buf_size;
ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
if (ret < 0)
return ret;
- size = 512 - shift;
+ size = bs->buffer_alignment - shift;
if (size > count)
size = count;
memcpy(buf, s->aligned_buf + shift, size);
if (count == 0)
return sum;
}
- if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+ if (count & sector_mask || (uintptr_t) buf & sector_mask) {
/* read on aligned buffer */
while (count) {
- size = (count + 0x1ff) & ~0x1ff;
- if (size > ALIGNED_BUFFER_SIZE)
- size = ALIGNED_BUFFER_SIZE;
+ size = (count + sector_mask) & ~sector_mask;
+ if (size > s->aligned_buf_size)
+ size = s->aligned_buf_size;
ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
- if (ret < 0)
+ if (ret < 0) {
return ret;
+ } else if (ret == 0) {
+ fprintf(stderr, "raw_pread: read beyond end of file\n");
+ abort();
+ }
size = ret;
if (size > count)
{
int ret;
- ret = raw_pread(bs, sector_num * 512, buf, nb_sectors * 512);
- if (ret == (nb_sectors * 512))
+ ret = raw_pread(bs, sector_num * BDRV_SECTOR_SIZE, buf,
+ nb_sectors * BDRV_SECTOR_SIZE);
+ if (ret == (nb_sectors * BDRV_SECTOR_SIZE))
ret = 0;
return ret;
}
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
+ unsigned sector_mask = bs->buffer_alignment - 1;
int size, ret, shift, sum;
sum = 0;
if (s->aligned_buf != NULL) {
- if (offset & 0x1ff) {
- /* align offset on a 512 bytes boundary */
- shift = offset & 0x1ff;
- ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
+ if (offset & sector_mask) {
+ /* align offset on a sector size bytes boundary */
+ shift = offset & sector_mask;
+ ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf,
+ bs->buffer_alignment);
if (ret < 0)
return ret;
- size = 512 - shift;
+ size = bs->buffer_alignment - shift;
if (size > count)
size = count;
memcpy(s->aligned_buf + shift, buf, size);
- ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
+ ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf,
+ bs->buffer_alignment);
if (ret < 0)
return ret;
if (count == 0)
return sum;
}
- if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+ if (count & sector_mask || (uintptr_t) buf & sector_mask) {
- while ((size = (count & ~0x1ff)) != 0) {
+ while ((size = (count & ~sector_mask)) != 0) {
- if (size > ALIGNED_BUFFER_SIZE)
- size = ALIGNED_BUFFER_SIZE;
+ if (size > s->aligned_buf_size)
+ size = s->aligned_buf_size;
memcpy(s->aligned_buf, buf, size);
count -= ret;
sum += ret;
}
- /* here, count < 512 because (count & ~0x1ff) == 0 */
+ /* here, count < sector_size because (count & ~sector_mask) == 0 */
if (count) {
- ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
+ ret = raw_pread_aligned(bs, offset, s->aligned_buf,
+ bs->buffer_alignment);
if (ret < 0)
return ret;
memcpy(s->aligned_buf, buf, count);
- ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
+ ret = raw_pwrite_aligned(bs, offset, s->aligned_buf,
+ bs->buffer_alignment);
if (ret < 0)
return ret;
if (count < ret)
const uint8_t *buf, int nb_sectors)
{
int ret;
- ret = raw_pwrite(bs, sector_num * 512, buf, nb_sectors * 512);
- if (ret == (nb_sectors * 512))
+ ret = raw_pwrite(bs, sector_num * BDRV_SECTOR_SIZE, buf,
+ nb_sectors * BDRV_SECTOR_SIZE);
+ if (ret == (nb_sectors * BDRV_SECTOR_SIZE))
ret = 0;
return ret;
}
-#ifdef CONFIG_AIO
-/***********************************************************/
-/* Unix AIO using POSIX AIO */
-
-typedef struct RawAIOCB {
- BlockDriverAIOCB common;
- struct qemu_paiocb aiocb;
- struct RawAIOCB *next;
- int ret;
-} RawAIOCB;
-
-typedef struct PosixAioState
-{
- int rfd, wfd;
- RawAIOCB *first_aio;
-} PosixAioState;
-
-static void posix_aio_read(void *opaque)
-{
- PosixAioState *s = opaque;
- RawAIOCB *acb, **pacb;
- int ret;
- ssize_t len;
-
- /* read all bytes from signal pipe */
- for (;;) {
- char bytes[16];
-
- len = read(s->rfd, bytes, sizeof(bytes));
- if (len == -1 && errno == EINTR)
- continue; /* try again */
- if (len == sizeof(bytes))
- continue; /* more to read */
- break;
- }
-
- for(;;) {
- pacb = &s->first_aio;
- for(;;) {
- acb = *pacb;
- if (!acb)
- goto the_end;
- ret = qemu_paio_error(&acb->aiocb);
- if (ret == ECANCELED) {
- /* remove the request */
- *pacb = acb->next;
- qemu_aio_release(acb);
- } else if (ret != EINPROGRESS) {
- /* end of aio */
- if (ret == 0) {
- ret = qemu_paio_return(&acb->aiocb);
- if (ret == acb->aiocb.aio_nbytes)
- ret = 0;
- else
- ret = -EINVAL;
- } else {
- ret = -ret;
- }
- /* remove the request */
- *pacb = acb->next;
- /* call the callback */
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- break;
- } else {
- pacb = &acb->next;
- }
- }
- }
- the_end: ;
-}
-
-static int posix_aio_flush(void *opaque)
-{
- PosixAioState *s = opaque;
- return !!s->first_aio;
-}
-
-static PosixAioState *posix_aio_state;
-
-static void aio_signal_handler(int signum)
-{
- if (posix_aio_state) {
- char byte = 0;
-
- write(posix_aio_state->wfd, &byte, sizeof(byte));
- }
-
- qemu_service_io();
-}
-
-static int posix_aio_init(void)
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
{
- struct sigaction act;
- PosixAioState *s;
- int fds[2];
- struct qemu_paioinit ai;
-
- if (posix_aio_state)
- return 0;
+ int i;
- s = qemu_malloc(sizeof(PosixAioState));
-
- sigfillset(&act.sa_mask);
- act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
- act.sa_handler = aio_signal_handler;
- sigaction(SIGUSR2, &act, NULL);
-
- s->first_aio = NULL;
- if (pipe(fds) == -1) {
- fprintf(stderr, "failed to create pipe\n");
- return -errno;
- }
-
- s->rfd = fds[0];
- s->wfd = fds[1];
-
- fcntl(s->rfd, F_SETFL, O_NONBLOCK);
- fcntl(s->wfd, F_SETFL, O_NONBLOCK);
-
- qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
-
- memset(&ai, 0, sizeof(ai));
- ai.aio_threads = 64;
- ai.aio_num = 64;
- qemu_paio_init(&ai);
-
- posix_aio_state = s;
-
- return 0;
-}
-
-static void raw_aio_remove(RawAIOCB *acb)
-{
- RawAIOCB **pacb;
-
- /* remove the callback from the queue */
- pacb = &posix_aio_state->first_aio;
- for(;;) {
- if (*pacb == NULL) {
- fprintf(stderr, "raw_aio_remove: aio request not found!\n");
- break;
- } else if (*pacb == acb) {
- *pacb = acb->next;
- qemu_aio_release(acb);
- break;
+ for (i = 0; i < qiov->niov; i++) {
+ if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
+ return 0;
}
- pacb = &(*pacb)->next;
- }
-}
-
-static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- int ret;
- RawAIOCB *acb = (RawAIOCB *)blockacb;
-
- ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
- if (ret == QEMU_PAIO_NOTCANCELED) {
- /* fail safe: if the aio could not be canceled, we wait for
- it */
- while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
}
- raw_aio_remove(acb);
+ return 1;
}
-static AIOPool raw_aio_pool = {
- .aiocb_size = sizeof(RawAIOCB),
- .cancel = raw_aio_cancel,
-};
-
-static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque, int type)
{
BDRVRawState *s = bs->opaque;
- RawAIOCB *acb;
if (fd_open(bs) < 0)
return NULL;
- acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
- if (!acb)
- return NULL;
- acb->aiocb.aio_fildes = s->fd;
- acb->aiocb.ev_signo = SIGUSR2;
- acb->aiocb.aio_iov = qiov->iov;
- acb->aiocb.aio_niov = qiov->niov;
- acb->aiocb.aio_nbytes = nb_sectors * 512;
- acb->aiocb.aio_offset = sector_num * 512;
- acb->aiocb.aio_flags = 0;
-
/*
* If O_DIRECT is used the buffer needs to be aligned on a sector
- * boundary. Tell the low level code to ensure that in case it's
- * not done yet.
+ * boundary. Check if this is the case or telll the low-level
+ * driver that it needs to copy the buffer.
*/
- if (s->aligned_buf)
- acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
+ if (s->aligned_buf) {
+ if (!qiov_is_aligned(bs, qiov)) {
+ type |= QEMU_AIO_MISALIGNED;
+#ifdef CONFIG_LINUX_AIO
+ } else if (s->use_aio) {
+ return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
+ nb_sectors, cb, opaque, type);
+#endif
+ }
+ }
- acb->next = posix_aio_state->first_aio;
- posix_aio_state->first_aio = acb;
- return acb;
+ return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
+ cb, opaque, type);
}
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
- RawAIOCB *acb;
-
- acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
- if (!acb)
- return NULL;
- if (qemu_paio_read(&acb->aiocb) < 0) {
- raw_aio_remove(acb);
- return NULL;
- }
- return &acb->common;
+ return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, QEMU_AIO_READ);
}
static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
- RawAIOCB *acb;
-
- acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
- if (!acb)
- return NULL;
- if (qemu_paio_write(&acb->aiocb) < 0) {
- raw_aio_remove(acb);
- return NULL;
- }
- return &acb->common;
+ return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, QEMU_AIO_WRITE);
}
-#else /* CONFIG_AIO */
-static int posix_aio_init(void)
+
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
{
- return 0;
-}
-#endif /* CONFIG_AIO */
+ BDRVRawState *s = bs->opaque;
+
+ if (fd_open(bs) < 0)
+ return NULL;
+ return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
+}
static void raw_close(BlockDriverState *bs)
{
close(s->fd);
s->fd = -1;
if (s->aligned_buf != NULL)
- qemu_free(s->aligned_buf);
+ qemu_vfree(s->aligned_buf);
}
}
} else
return st.st_size;
}
-#else /* !__OpenBSD__ */
-static int64_t raw_getlength(BlockDriverState *bs)
+#elif defined(__NetBSD__)
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ int fd = s->fd;
+ struct stat st;
+
+ if (fstat(fd, &st))
+ return -1;
+ if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
+ struct dkwedge_info dkw;
+
+ if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
+ return dkw.dkw_size * 512;
+ } else {
+ struct disklabel dl;
+
+ if (ioctl(fd, DIOCGDINFO, &dl))
+ return -1;
+ return (uint64_t)dl.d_secsize *
+ dl.d_partitions[DISKPART(st.st_rdev)].p_size;
+ }
+ } else
+ return st.st_size;
+}
+#elif defined(__sun__)
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ struct dk_minfo minfo;
+ int ret;
+
+ ret = fd_open(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * Use the DKIOCGMEDIAINFO ioctl to read the size.
+ */
+ ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
+ if (ret != -1) {
+ return minfo.dki_lbsize * minfo.dki_capacity;
+ }
+
+ /*
+ * There are reports that lseek on some devices fails, but
+ * irc discussion said that contingency on contingency was overkill.
+ */
+ return lseek(s->fd, 0, SEEK_END);
+}
+#elif defined(CONFIG_BSD)
+static int64_t raw_getlength(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
int fd = s->fd;
int64_t size;
-#ifdef HOST_BSD
struct stat sb;
-#ifdef __FreeBSD__
+#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
int reopened = 0;
-#endif
-#endif
-#ifdef __sun__
- struct dk_minfo minfo;
- int rv;
#endif
int ret;
if (ret < 0)
return ret;
-#ifdef HOST_BSD
-#ifdef __FreeBSD__
+#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
again:
#endif
if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
#else
size = lseek(fd, 0LL, SEEK_END);
#endif
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
switch(s->type) {
case FTYPE_CD:
/* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
}
}
#endif
- } else
-#endif
-#ifdef __sun__
- /*
- * use the DKIOCGMEDIAINFO ioctl to read the size.
- */
- rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
- if ( rv != -1 ) {
- size = minfo.dki_lbsize * minfo.dki_capacity;
- } else /* there are reports that lseek on some devices
- fails, but irc discussion said that contingency
- on contingency was overkill */
-#endif
- {
+ } else {
size = lseek(fd, 0, SEEK_END);
}
return size;
}
+#else
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ int ret;
+
+ ret = fd_open(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return lseek(s->fd, 0, SEEK_END);
+}
#endif
static int raw_create(const char *filename, QEMUOptionParameter *options)
{
int fd;
+ int result = 0;
int64_t total_size = 0;
/* Read out options */
while (options && options->name) {
if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / 512;
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
}
options++;
}
fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
0644);
- if (fd < 0)
- return -EIO;
- ftruncate(fd, total_size * 512);
- close(fd);
- return 0;
+ if (fd < 0) {
+ result = -errno;
+ } else {
+ if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+ result = -errno;
+ }
+ if (close(fd) != 0) {
+ result = -errno;
+ }
+ }
+ return result;
}
-static void raw_flush(BlockDriverState *bs)
+static int raw_flush(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
- fsync(s->fd);
+ return qemu_fdatasync(s->fd);
+}
+
+#ifdef CONFIG_XFS
+static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
+{
+ struct xfs_flock64 fl;
+
+ memset(&fl, 0, sizeof(fl));
+ fl.l_whence = SEEK_SET;
+ fl.l_start = sector_num << 9;
+ fl.l_len = (int64_t)nb_sectors << 9;
+
+ if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
+ DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
+ return -errno;
+ }
+
+ return 0;
}
+#endif
+
+static int raw_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+#ifdef CONFIG_XFS
+ BDRVRawState *s = bs->opaque;
+
+ if (s->is_xfs) {
+ return xfs_discard(s, sector_num, nb_sectors);
+ }
+#endif
+ return 0;
+}
static QEMUOptionParameter raw_create_options[] = {
{
{ NULL }
};
-static BlockDriver bdrv_raw = {
- .format_name = "raw",
+static BlockDriver bdrv_file = {
+ .format_name = "file",
+ .protocol_name = "file",
.instance_size = sizeof(BDRVRawState),
.bdrv_probe = NULL, /* no probe for protocols */
- .bdrv_open = raw_open,
+ .bdrv_file_open = raw_open,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_flush = raw_flush,
+ .bdrv_discard = raw_discard,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
#endif
s->type = FTYPE_FILE;
-#if defined(__linux__) && defined(CONFIG_AIO)
- if (strstart(filename, "/dev/sg", NULL)) {
- bs->sg = 1;
+#if defined(__linux__)
+ {
+ char resolved_path[ MAXPATHLEN ], *temp;
+
+ temp = realpath(filename, resolved_path);
+ if (temp && strstart(temp, "/dev/sg", NULL)) {
+ bs->sg = 1;
+ }
}
#endif
return 0;
last_media_present = (s->fd >= 0);
if (s->fd >= 0 &&
- (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
+ (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
close(s->fd);
s->fd = -1;
#ifdef DEBUG_FLOPPY
}
if (s->fd < 0) {
if (s->fd_got_error &&
- (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
+ (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
#ifdef DEBUG_FLOPPY
printf("No floppy (open delayed)\n");
#endif
}
s->fd = open(bs->filename, s->open_flags & ~O_NONBLOCK);
if (s->fd < 0) {
- s->fd_error_time = qemu_get_clock(rt_clock);
+ s->fd_error_time = get_clock();
s->fd_got_error = 1;
if (last_media_present)
s->fd_media_changed = 1;
}
if (!last_media_present)
s->fd_media_changed = 1;
- s->fd_open_time = qemu_get_clock(rt_clock);
+ s->fd_open_time = get_clock();
s->fd_got_error = 0;
return 0;
}
return ioctl(s->fd, req, buf);
}
-#ifdef CONFIG_AIO
static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
- RawAIOCB *acb;
if (fd_open(bs) < 0)
return NULL;
-
- acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
- if (!acb)
- return NULL;
- acb->aiocb.aio_fildes = s->fd;
- acb->aiocb.ev_signo = SIGUSR2;
- acb->aiocb.aio_offset = 0;
- acb->aiocb.aio_flags = 0;
-
- acb->next = posix_aio_state->first_aio;
- posix_aio_state->first_aio = acb;
-
- acb->aiocb.aio_ioctl_buf = buf;
- acb->aiocb.aio_ioctl_cmd = req;
- if (qemu_paio_ioctl(&acb->aiocb) < 0) {
- raw_aio_remove(acb);
- return NULL;
- }
-
- return &acb->common;
+ return paio_ioctl(bs, s->fd, req, buf, cb, opaque);
}
-#endif
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static int fd_open(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
/* Read out options */
while (options && options->name) {
if (!strcmp(options->name, "size")) {
- total_size = options->value.n / 512;
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
}
options++;
}
fd = open(filename, O_WRONLY | O_BINARY);
if (fd < 0)
- return -EIO;
+ return -errno;
if (fstat(fd, &stat_buf) < 0)
- ret = -EIO;
+ ret = -errno;
else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
- ret = -EIO;
- else if (lseek(fd, 0, SEEK_END) < total_size * 512)
+ ret = -ENODEV;
+ else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
ret = -ENOSPC;
close(fd);
return ret;
}
+static int hdev_has_zero_init(BlockDriverState *bs)
+{
+ return 0;
+}
+
static BlockDriver bdrv_host_device = {
- .format_name = "host_device",
- .instance_size = sizeof(BDRVRawState),
- .bdrv_probe_device = hdev_probe_device,
- .bdrv_open = hdev_open,
- .bdrv_close = raw_close,
+ .format_name = "host_device",
+ .protocol_name = "host_device",
+ .instance_size = sizeof(BDRVRawState),
+ .bdrv_probe_device = hdev_probe_device,
+ .bdrv_file_open = hdev_open,
+ .bdrv_close = raw_close,
.bdrv_create = hdev_create,
- .bdrv_flush = raw_flush,
+ .create_options = raw_create_options,
+ .bdrv_has_zero_init = hdev_has_zero_init,
+ .bdrv_flush = raw_flush,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
/* generic scsi device */
#ifdef __linux__
.bdrv_ioctl = hdev_ioctl,
-#ifdef CONFIG_AIO
.bdrv_aio_ioctl = hdev_aio_ioctl,
#endif
-#endif
};
#ifdef __linux__
BDRVRawState *s = bs->opaque;
int ret;
- posix_aio_init();
-
s->type = FTYPE_FD;
/* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
static int floppy_probe_device(const char *filename)
{
+ int fd, ret;
+ int prio = 0;
+ struct floppy_struct fdparam;
+ struct stat st;
+
if (strstart(filename, "/dev/fd", NULL))
- return 100;
- return 0;
+ prio = 50;
+
+ fd = open(filename, O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ goto out;
+ }
+ ret = fstat(fd, &st);
+ if (ret == -1 || !S_ISBLK(st.st_mode)) {
+ goto outc;
+ }
+
+ /* Attempt to detect via a floppy specific ioctl */
+ ret = ioctl(fd, FDGETPRM, &fdparam);
+ if (ret >= 0)
+ prio = 100;
+
+outc:
+ close(fd);
+out:
+ return prio;
}
static BlockDriver bdrv_host_floppy = {
.format_name = "host_floppy",
+ .protocol_name = "host_floppy",
.instance_size = sizeof(BDRVRawState),
.bdrv_probe_device = floppy_probe_device,
- .bdrv_open = floppy_open,
+ .bdrv_file_open = floppy_open,
.bdrv_close = raw_close,
.bdrv_create = hdev_create,
+ .create_options = raw_create_options,
+ .bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_flush = raw_flush,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
static int cdrom_probe_device(const char *filename)
{
- if (strstart(filename, "/dev/cd", NULL))
- return 100;
- return 0;
+ int fd, ret;
+ int prio = 0;
+ struct stat st;
+
+ fd = open(filename, O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ goto out;
+ }
+ ret = fstat(fd, &st);
+ if (ret == -1 || !S_ISBLK(st.st_mode)) {
+ goto outc;
+ }
+
+ /* Attempt to detect via a CDROM specific ioctl */
+ ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
+ if (ret >= 0)
+ prio = 100;
+
+outc:
+ close(fd);
+out:
+ return prio;
}
static int cdrom_is_inserted(BlockDriverState *bs)
static BlockDriver bdrv_host_cdrom = {
.format_name = "host_cdrom",
+ .protocol_name = "host_cdrom",
.instance_size = sizeof(BDRVRawState),
.bdrv_probe_device = cdrom_probe_device,
- .bdrv_open = cdrom_open,
+ .bdrv_file_open = cdrom_open,
.bdrv_close = raw_close,
.bdrv_create = hdev_create,
+ .create_options = raw_create_options,
+ .bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_flush = raw_flush,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
/* generic scsi device */
.bdrv_ioctl = hdev_ioctl,
-#ifdef CONFIG_AIO
.bdrv_aio_ioctl = hdev_aio_ioctl,
-#endif
};
#endif /* __linux__ */
-#ifdef __FreeBSD__
+#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
{
BDRVRawState *s = bs->opaque;
static BlockDriver bdrv_host_cdrom = {
.format_name = "host_cdrom",
+ .protocol_name = "host_cdrom",
.instance_size = sizeof(BDRVRawState),
.bdrv_probe_device = cdrom_probe_device,
- .bdrv_open = cdrom_open,
+ .bdrv_file_open = cdrom_open,
.bdrv_close = raw_close,
.bdrv_create = hdev_create,
+ .create_options = raw_create_options,
+ .bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_flush = raw_flush,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
};
#endif /* __FreeBSD__ */
-static void bdrv_raw_init(void)
+static void bdrv_file_init(void)
{
/*
* Register all the drivers. Note that order is important, the driver
* registered last will get probed first.
*/
- bdrv_register(&bdrv_raw);
+ bdrv_register(&bdrv_file);
bdrv_register(&bdrv_host_device);
#ifdef __linux__
bdrv_register(&bdrv_host_floppy);
bdrv_register(&bdrv_host_cdrom);
#endif
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
bdrv_register(&bdrv_host_cdrom);
#endif
}
-block_init(bdrv_raw_init);
+block_init(bdrv_file_init);