*/
#include "config-host.h"
#include "qemu-common.h"
+#include "trace.h"
#include "monitor.h"
#include "block_int.h"
#include "module.h"
return NULL;
}
-static BlockDriver *find_image_format(const char *filename)
+static int find_image_format(const char *filename, BlockDriver **pdrv)
{
int ret, score, score_max;
BlockDriver *drv1, *drv;
BlockDriverState *bs;
ret = bdrv_file_open(&bs, filename, 0);
- if (ret < 0)
- return NULL;
+ if (ret < 0) {
+ *pdrv = NULL;
+ return ret;
+ }
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
if (bs->sg || !bdrv_is_inserted(bs)) {
bdrv_delete(bs);
- return bdrv_find_format("raw");
+ drv = bdrv_find_format("raw");
+ if (!drv) {
+ ret = -ENOENT;
+ }
+ *pdrv = drv;
+ return ret;
}
ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bdrv_delete(bs);
if (ret < 0) {
- return NULL;
+ *pdrv = NULL;
+ return ret;
}
score_max = 0;
}
}
}
- return drv;
+ if (!drv) {
+ ret = -ENOENT;
+ }
+ *pdrv = drv;
+ return ret;
}
/**
/* Find the right image format driver */
if (!drv) {
- drv = find_image_format(filename);
+ ret = find_image_format(filename, &drv);
}
if (!drv) {
- ret = -ENOENT;
goto unlink_and_fail;
}
/*
* Run consistency checks on an image
*
- * Returns the number of errors or -errno when an internal error occurs
+ * Returns 0 if the check could be completed (it doesn't mean that the image is
+ * free of errors) or -errno when an internal error occured. The results of the
+ * check are stored in res.
*/
-int bdrv_check(BlockDriverState *bs)
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
{
if (bs->drv->bdrv_check == NULL) {
return -ENOTSUP;
}
- return bs->drv->bdrv_check(bs);
+ memset(res, 0, sizeof(*res));
+ return bs->drv->bdrv_check(bs, res);
}
+#define COMMIT_BUF_SECTORS 2048
+
/* commit COW file into the raw image */
int bdrv_commit(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
- int64_t i, total_sectors;
- int n, j, ro, open_flags;
+ BlockDriver *backing_drv;
+ int64_t sector, total_sectors;
+ int n, ro, open_flags;
int ret = 0, rw_ret = 0;
- unsigned char sector[BDRV_SECTOR_SIZE];
+ uint8_t *buf;
char filename[1024];
BlockDriverState *bs_rw, *bs_ro;
if (bs->backing_hd->keep_read_only) {
return -EACCES;
}
-
+
+ backing_drv = bs->backing_hd->drv;
ro = bs->backing_hd->read_only;
strncpy(filename, bs->backing_hd->filename, sizeof(filename));
open_flags = bs->backing_hd->open_flags;
bdrv_delete(bs->backing_hd);
bs->backing_hd = NULL;
bs_rw = bdrv_new("");
- rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, drv);
+ rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
+ backing_drv);
if (rw_ret < 0) {
bdrv_delete(bs_rw);
/* try to re-open read-only */
bs_ro = bdrv_new("");
- ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, drv);
+ ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
+ backing_drv);
if (ret < 0) {
bdrv_delete(bs_ro);
/* drive not functional anymore */
}
total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
- for (i = 0; i < total_sectors;) {
- if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
- for(j = 0; j < n; j++) {
- if (bdrv_read(bs, i, sector, 1) != 0) {
- ret = -EIO;
- goto ro_cleanup;
- }
+ buf = qemu_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
- if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
- ret = -EIO;
- goto ro_cleanup;
- }
- i++;
- }
- } else {
- i += n;
+ for (sector = 0; sector < total_sectors; sector += n) {
+ if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
+
+ if (bdrv_read(bs, sector, buf, n) != 0) {
+ ret = -EIO;
+ goto ro_cleanup;
+ }
+
+ if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
+ ret = -EIO;
+ goto ro_cleanup;
+ }
}
}
bdrv_flush(bs->backing_hd);
ro_cleanup:
+ qemu_free(buf);
if (ro) {
/* re-open as RO */
bdrv_delete(bs->backing_hd);
bs->backing_hd = NULL;
bs_ro = bdrv_new("");
- ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, drv);
+ ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
+ backing_drv);
if (ret < 0) {
bdrv_delete(bs_ro);
/* drive not functional anymore */
bit = start % (sizeof(unsigned long) * 8);
val = bs->dirty_bitmap[idx];
if (dirty) {
- if (!(val & (1 << bit))) {
+ if (!(val & (1UL << bit))) {
bs->dirty_count++;
- val |= 1 << bit;
+ val |= 1UL << bit;
}
} else {
- if (val & (1 << bit)) {
+ if (val & (1UL << bit)) {
bs->dirty_count--;
- val &= ~(1 << bit);
+ val &= ~(1UL << bit);
}
}
bs->dirty_bitmap[idx] = val;
return is_read ? bs->on_read_error : bs->on_write_error;
}
+void bdrv_set_removable(BlockDriverState *bs, int removable)
+{
+ bs->removable = removable;
+ if (removable && bs == bs_snapshots) {
+ bs_snapshots = NULL;
+ }
+}
+
int bdrv_is_removable(BlockDriverState *bs)
{
return bs->removable;
return bs->device_name;
}
-void bdrv_flush(BlockDriverState *bs)
+int bdrv_flush(BlockDriverState *bs)
{
if (bs->open_flags & BDRV_O_NO_FLUSH) {
- return;
+ return 0;
+ }
+
+ if (bs->drv && bs->drv->bdrv_flush) {
+ return bs->drv->bdrv_flush(bs);
}
- if (bs->drv && bs->drv->bdrv_flush)
- bs->drv->bdrv_flush(bs);
+ /*
+ * Some block drivers always operate in either writethrough or unsafe mode
+ * and don't support bdrv_flush therefore. Usually qemu doesn't know how
+ * the server works (because the behaviour is hardcoded or depends on
+ * server-side configuration), so we can't ensure that everything is safe
+ * on disk. Returning an error doesn't work because that would break guests
+ * even if the server operates in writethrough mode.
+ *
+ * Let's hope the user knows what he's doing.
+ */
+ return 0;
}
void bdrv_flush_all(void)
{
assert(bs->drv);
- if (bs->drv->no_zero_init) {
- return 0;
- } else if (bs->file) {
- return bdrv_has_zero_init(bs->file);
+ if (bs->drv->bdrv_has_zero_init) {
+ return bs->drv->bdrv_has_zero_init(bs);
}
return 1;
return 1;
}
+int bdrv_is_snapshot(BlockDriverState *bs)
+{
+ return !!(bs->open_flags & BDRV_O_SNAPSHOT);
+}
+
BlockDriverState *bdrv_snapshots(void)
{
BlockDriverState *bs;
- if (bs_snapshots)
+ if (bs_snapshots) {
return bs_snapshots;
+ }
bs = NULL;
while ((bs = bdrv_next(bs))) {
if (bdrv_can_snapshot(bs)) {
- goto ok;
+ bs_snapshots = bs;
+ return bs;
}
}
return NULL;
- ok:
- bs_snapshots = bs;
- return bs;
}
int bdrv_snapshot_create(BlockDriverState *bs,
return -ENOTSUP;
}
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+ const char *snapshot_name)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (!bs->read_only) {
+ return -EINVAL;
+ }
+ if (drv->bdrv_snapshot_load_tmp) {
+ return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
+ }
+ return -ENOTSUP;
+}
+
#define NB_SUFFIXES 4
char *get_human_readable_size(char *buf, int buf_size, int64_t size)
BlockDriver *drv = bs->drv;
BlockDriverAIOCB *ret;
+ trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
+
if (!drv)
return NULL;
if (bdrv_check_request(bs, sector_num, nb_sectors))
return ret;
}
+typedef struct BlockCompleteData {
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+ BlockDriverState *bs;
+ int64_t sector_num;
+ int nb_sectors;
+} BlockCompleteData;
+
+static void block_complete_cb(void *opaque, int ret)
+{
+ BlockCompleteData *b = opaque;
+
+ if (b->bs->dirty_bitmap) {
+ set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
+ }
+ b->cb(b->opaque, ret);
+ qemu_free(b);
+}
+
+static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BlockCompleteData *blkdata = qemu_mallocz(sizeof(BlockCompleteData));
+
+ blkdata->bs = bs;
+ blkdata->cb = cb;
+ blkdata->opaque = opaque;
+ blkdata->sector_num = sector_num;
+ blkdata->nb_sectors = nb_sectors;
+
+ return blkdata;
+}
+
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BlockDriver *drv = bs->drv;
BlockDriverAIOCB *ret;
+ BlockCompleteData *blk_cb_data;
+
+ trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
if (!drv)
return NULL;
return NULL;
if (bs->dirty_bitmap) {
- set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
+ blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
+ opaque);
+ cb = &block_complete_cb;
+ opaque = blk_cb_data;
}
ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
{
MultiwriteCB *mcb = opaque;
+ trace_multiwrite_cb(mcb, ret);
+
if (ret < 0 && !mcb->error) {
mcb->error = ret;
- multiwrite_user_cb(mcb);
}
mcb->num_requests--;
if (mcb->num_requests == 0) {
- if (mcb->error == 0) {
- multiwrite_user_cb(mcb);
- }
+ multiwrite_user_cb(mcb);
qemu_free(mcb);
}
}
// Check for mergable requests
num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
+ trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
+
+ /*
+ * Run the aio requests. As soon as one request can't be submitted
+ * successfully, fail all requests that are not yet submitted (we must
+ * return failure for all requests anyway)
+ *
+ * num_requests cannot be set to the right value immediately: If
+ * bdrv_aio_writev fails for some request, num_requests would be too high
+ * and therefore multiwrite_cb() would never recognize the multiwrite
+ * request as completed. We also cannot use the loop variable i to set it
+ * when the first request fails because the callback may already have been
+ * called for previously submitted requests. Thus, num_requests must be
+ * incremented for each request that is submitted.
+ *
+ * The problem that callbacks may be called early also means that we need
+ * to take care that num_requests doesn't become 0 before all requests are
+ * submitted - multiwrite_cb() would consider the multiwrite request
+ * completed. A dummy request that is "completed" by a manual call to
+ * multiwrite_cb() takes care of this.
+ */
+ mcb->num_requests = 1;
+
// Run the aio requests
for (i = 0; i < num_reqs; i++) {
+ mcb->num_requests++;
acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
reqs[i].nb_sectors, multiwrite_cb, mcb);
// We can only fail the whole thing if no request has been
// submitted yet. Otherwise we'll wait for the submitted AIOs to
// complete and report the error in the callback.
- if (mcb->num_requests == 0) {
- reqs[i].error = -EIO;
+ if (i == 0) {
+ trace_bdrv_aio_multiwrite_earlyfail(mcb);
goto fail;
} else {
- mcb->num_requests++;
+ trace_bdrv_aio_multiwrite_latefail(mcb, i);
multiwrite_cb(mcb, -EIO);
break;
}
- } else {
- mcb->num_requests++;
}
}
+ /* Complete the dummy request */
+ multiwrite_cb(mcb, 0);
+
return 0;
fail:
+ for (i = 0; i < mcb->num_callbacks; i++) {
+ reqs[i].error = -EIO;
+ }
qemu_free(mcb);
return -1;
}
if (!drv)
return 0;
if (!drv->bdrv_is_inserted)
- return 1;
+ return !bs->tray_open;
ret = drv->bdrv_is_inserted(bs);
return ret;
}
ret = drv->bdrv_eject(bs, eject_flag);
}
if (ret == -ENOTSUP) {
- if (eject_flag)
- bdrv_close(bs);
ret = 0;
}
+ if (ret >= 0) {
+ bs->tray_open = eject_flag;
+ }
return ret;
}
if (bs->dirty_bitmap &&
(sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
- return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
- (1 << (chunk % (sizeof(unsigned long) * 8)));
+ return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
+ (1UL << (chunk % (sizeof(unsigned long) * 8))));
} else {
return 0;
}