bs->slice_start = 0;
bs->slice_end = 0;
- bs->slice_time = 0;
- memset(&bs->io_base, 0, sizeof(bs->io_base));
}
static void bdrv_block_timer(void *opaque)
{
qemu_co_queue_init(&bs->throttled_reqs);
bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
- bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
- bs->slice_start = qemu_get_clock_ns(vm_clock);
- bs->slice_end = bs->slice_start + bs->slice_time;
- memset(&bs->io_base, 0, sizeof(bs->io_base));
bs->io_limits_enabled = true;
}
int ret = 0;
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
- if (bs->sg || !bdrv_is_inserted(bs)) {
+ if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
drv = bdrv_find_format("raw");
if (!drv) {
ret = -ENOENT;
return 0;
}
+/**
+ * Set open flags for a given discard mode
+ *
+ * Return 0 on success, -1 if the discard mode was invalid.
+ */
+int bdrv_parse_discard_flags(const char *mode, int *flags)
+{
+ *flags &= ~BDRV_O_UNMAP;
+
+ if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
+ /* do nothing */
+ } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
+ *flags |= BDRV_O_UNMAP;
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
/**
* Set open flags for a given cache mode
*
/*
* Common part for opening disk images and files
+ *
+ * Removes all processed options from *options.
*/
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
- const char *filename,
+ const char *filename, QDict *options,
int flags, BlockDriver *drv)
{
int ret, open_flags;
assert(drv != NULL);
assert(bs->file == NULL);
+ assert(options != NULL && bs->options != options);
trace_bdrv_open_common(bs, filename, flags, drv->format_name);
+ if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
+ return -ENOTSUP;
+ }
+
+ /* bdrv_open() with directly using a protocol as drv. This layer is already
+ * opened, so assign it to bs (while file becomes a closed BlockDriverState)
+ * and return immediately. */
+ if (file != NULL && drv->bdrv_file_open) {
+ bdrv_swap(file, bs);
+ return 0;
+ }
+
bs->open_flags = flags;
bs->buffer_alignment = 512;
bdrv_enable_copy_on_read(bs);
}
- pstrcpy(bs->filename, sizeof(bs->filename), filename);
-
- if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
- return -ENOTSUP;
+ if (filename != NULL) {
+ pstrcpy(bs->filename, sizeof(bs->filename), filename);
+ } else {
+ bs->filename[0] = '\0';
}
bs->drv = drv;
/* Open the image, either directly or using a protocol */
if (drv->bdrv_file_open) {
- if (file != NULL) {
- bdrv_swap(file, bs);
- ret = 0;
- } else {
- ret = drv->bdrv_file_open(bs, filename, open_flags);
- }
+ assert(file == NULL);
+ assert(drv->bdrv_parse_filename || filename != NULL);
+ ret = drv->bdrv_file_open(bs, filename, options, open_flags);
} else {
assert(file != NULL);
bs->file = file;
- ret = drv->bdrv_open(bs, open_flags);
+ ret = drv->bdrv_open(bs, options, open_flags);
}
if (ret < 0) {
#ifndef _WIN32
if (bs->is_temporary) {
+ assert(filename != NULL);
unlink(filename);
}
#endif
/*
* Opens a file using a protocol (file, host_device, nbd, ...)
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
*/
-int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
+int bdrv_file_open(BlockDriverState **pbs, const char *filename,
+ QDict *options, int flags)
{
BlockDriverState *bs;
BlockDriver *drv;
+ const char *drvname;
int ret;
- drv = bdrv_find_protocol(filename);
- if (!drv) {
- return -ENOENT;
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
}
bs = bdrv_new("");
- ret = bdrv_open_common(bs, NULL, filename, flags, drv);
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* Find the right block driver */
+ drvname = qdict_get_try_str(options, "driver");
+ if (drvname) {
+ drv = bdrv_find_whitelisted_format(drvname);
+ qdict_del(options, "driver");
+ } else if (filename) {
+ drv = bdrv_find_protocol(filename);
+ } else {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR,
+ "Must specify either driver or file");
+ drv = NULL;
+ }
+
+ if (!drv) {
+ ret = -ENOENT;
+ goto fail;
+ }
+
+ /* Parse the filename and open it */
+ if (drv->bdrv_parse_filename && filename) {
+ Error *local_err = NULL;
+ drv->bdrv_parse_filename(filename, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+ } else if (!drv->bdrv_parse_filename && !filename) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR,
+ "The '%s' block driver requires a file name",
+ drv->format_name);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = bdrv_open_common(bs, NULL, filename, options, flags, drv);
if (ret < 0) {
- bdrv_delete(bs);
- return ret;
+ goto fail;
}
+
+ /* Check if any unknown options were used */
+ if (qdict_size(options) != 0) {
+ const QDictEntry *entry = qdict_first(options);
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't "
+ "support the option '%s'",
+ drv->format_name, entry->key);
+ ret = -EINVAL;
+ goto fail;
+ }
+ QDECREF(options);
+
bs->growable = 1;
*pbs = bs;
return 0;
+
+fail:
+ QDECREF(options);
+ if (!bs->drv) {
+ QDECREF(bs->options);
+ }
+ bdrv_delete(bs);
+ return ret;
}
int bdrv_open_backing_file(BlockDriverState *bs)
/* backing files always opened read-only */
back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);
- ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
+ ret = bdrv_open(bs->backing_hd, backing_filename, NULL,
+ back_flags, back_drv);
if (ret < 0) {
bdrv_delete(bs->backing_hd);
bs->backing_hd = NULL;
return 0;
}
+static void extract_subqdict(QDict *src, QDict **dst, const char *start)
+{
+ const QDictEntry *entry, *next;
+ const char *p;
+
+ *dst = qdict_new();
+ entry = qdict_first(src);
+
+ while (entry != NULL) {
+ next = qdict_next(src, entry);
+ if (strstart(entry->key, start, &p)) {
+ qobject_incref(entry->value);
+ qdict_put_obj(*dst, p, entry->value);
+ qdict_del(src, entry->key);
+ }
+ entry = next;
+ }
+}
+
/*
* Opens a disk image (raw, qcow2, vmdk, ...)
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use QINCREF() before calling bdrv_open.
*/
-int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
- BlockDriver *drv)
+int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
+ int flags, BlockDriver *drv)
{
int ret;
/* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
char tmp_filename[PATH_MAX + 1];
BlockDriverState *file = NULL;
+ QDict *file_options = NULL;
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* For snapshot=on, create a temporary qcow2 overlay */
if (flags & BDRV_O_SNAPSHOT) {
BlockDriverState *bs1;
int64_t total_size;
- int is_protocol = 0;
BlockDriver *bdrv_qcow2;
- QEMUOptionParameter *options;
+ QEMUOptionParameter *create_options;
char backing_filename[PATH_MAX];
+ if (qdict_size(options) != 0) {
+ error_report("Can't use snapshot=on with driver-specific options");
+ ret = -EINVAL;
+ goto fail;
+ }
+ assert(filename != NULL);
+
/* if snapshot, we create a temporary backing file and open it
instead of opening 'filename' directly */
/* if there is a backing file, use it */
bs1 = bdrv_new("");
- ret = bdrv_open(bs1, filename, 0, drv);
+ ret = bdrv_open(bs1, filename, NULL, 0, drv);
if (ret < 0) {
bdrv_delete(bs1);
- return ret;
+ goto fail;
}
total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
- if (bs1->drv && bs1->drv->protocol_name)
- is_protocol = 1;
-
bdrv_delete(bs1);
ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
if (ret < 0) {
- return ret;
+ goto fail;
}
/* Real path is meaningless for protocols */
- if (is_protocol)
+ if (path_has_protocol(filename)) {
snprintf(backing_filename, sizeof(backing_filename),
"%s", filename);
- else if (!realpath(filename, backing_filename))
- return -errno;
+ } else if (!realpath(filename, backing_filename)) {
+ ret = -errno;
+ goto fail;
+ }
bdrv_qcow2 = bdrv_find_format("qcow2");
- options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
+ create_options = parse_option_parameters("", bdrv_qcow2->create_options,
+ NULL);
- set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
- set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
+ set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
+ set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
+ backing_filename);
if (drv) {
- set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
+ set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
drv->format_name);
}
- ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
- free_option_parameters(options);
+ ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options);
+ free_option_parameters(create_options);
if (ret < 0) {
- return ret;
+ goto fail;
}
filename = tmp_filename;
flags |= BDRV_O_ALLOW_RDWR;
}
- ret = bdrv_file_open(&file, filename, bdrv_open_flags(bs, flags));
+ extract_subqdict(options, &file_options, "file.");
+
+ ret = bdrv_file_open(&file, filename, file_options,
+ bdrv_open_flags(bs, flags));
if (ret < 0) {
- return ret;
+ goto fail;
}
/* Find the right image format driver */
}
/* Open the image */
- ret = bdrv_open_common(bs, file, filename, flags, drv);
+ ret = bdrv_open_common(bs, file, filename, options, flags, drv);
if (ret < 0) {
goto unlink_and_fail;
}
if ((flags & BDRV_O_NO_BACKING) == 0) {
ret = bdrv_open_backing_file(bs);
if (ret < 0) {
- bdrv_close(bs);
- return ret;
+ goto close_and_fail;
}
}
+ /* Check if any unknown options were used */
+ if (qdict_size(options) != 0) {
+ const QDictEntry *entry = qdict_first(options);
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by "
+ "device '%s' doesn't support the option '%s'",
+ drv->format_name, bs->device_name, entry->key);
+
+ ret = -EINVAL;
+ goto close_and_fail;
+ }
+ QDECREF(options);
+
if (!bdrv_key_required(bs)) {
bdrv_dev_change_media_cb(bs, true);
}
if (bs->is_temporary) {
unlink(filename);
}
+fail:
+ QDECREF(bs->options);
+ QDECREF(options);
+ bs->options = NULL;
+ return ret;
+
+close_and_fail:
+ bdrv_close(bs);
+ QDECREF(options);
return ret;
}
bs->valid_key = 0;
bs->sg = 0;
bs->growable = 0;
+ QDECREF(bs->options);
+ bs->options = NULL;
if (bs->file != NULL) {
bdrv_delete(bs->file);
bs_dest->enable_write_cache = bs_src->enable_write_cache;
/* i/o timing parameters */
- bs_dest->slice_time = bs_src->slice_time;
bs_dest->slice_start = bs_src->slice_start;
bs_dest->slice_end = bs_src->slice_end;
+ bs_dest->slice_submitted = bs_src->slice_submitted;
bs_dest->io_limits = bs_src->io_limits;
- bs_dest->io_base = bs_src->io_base;
bs_dest->throttled_reqs = bs_src->throttled_reqs;
bs_dest->block_timer = bs_src->block_timer;
bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
bs_dest->iostatus = bs_src->iostatus;
/* dirty bitmap */
- bs_dest->dirty_count = bs_src->dirty_count;
bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
/* job */
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, list) {
- int ret = bdrv_commit(bs);
- if (ret < 0) {
- return ret;
+ if (bs->drv && bs->backing_hd) {
+ int ret = bdrv_commit(bs);
+ if (ret < 0) {
+ return ret;
+ }
}
}
return 0;
/**
* Round a region to cluster boundaries
*/
-static void round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
{
BlockDriverInfo bdi;
* CoR read and write operations are atomic and guest writes cannot
* interleave between them.
*/
- round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
do {
retry = false;
}
/*
- * Process a synchronous request using coroutines
+ * Process a vectored synchronous request using coroutines
*/
-static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
- int nb_sectors, bool is_write)
+static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, bool is_write)
{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *)buf,
- .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
- };
Coroutine *co;
RwCo rwco = {
.bs = bs,
.sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .qiov = &qiov,
+ .nb_sectors = qiov->size >> BDRV_SECTOR_BITS,
+ .qiov = qiov,
.is_write = is_write,
.ret = NOT_DONE,
};
-
- qemu_iovec_init_external(&qiov, &iov, 1);
+ assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
/**
* In sync call context, when the vcpu is blocked, this throttling timer
return rwco.ret;
}
+/*
+ * Process a synchronous request using coroutines
+ */
+static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
+ int nb_sectors, bool is_write)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
+}
+
/* return < 0 if error. See bdrv_write() for the return codes */
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
return ret;
}
-#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-
-static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int dirty)
-{
- int64_t start, end;
- unsigned long val, idx, bit;
-
- start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
- end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
-
- for (; start <= end; start++) {
- idx = start / BITS_PER_LONG;
- bit = start % BITS_PER_LONG;
- val = bs->dirty_bitmap[idx];
- if (dirty) {
- if (!(val & (1UL << bit))) {
- bs->dirty_count++;
- val |= 1UL << bit;
- }
- } else {
- if (val & (1UL << bit)) {
- bs->dirty_count--;
- val &= ~(1UL << bit);
- }
- }
- bs->dirty_bitmap[idx] = val;
- }
-}
-
/* Return < 0 if error. Important errors are:
-EIO generic I/O error (may happen for all errors)
-ENOMEDIUM No media inserted.
return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
}
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
+{
+ return bdrv_rwv_co(bs, sector_num, qiov, true);
+}
+
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count1)
{
return count1;
}
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
- const void *buf, int count1)
+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
{
uint8_t tmp_buf[BDRV_SECTOR_SIZE];
int len, nb_sectors, count;
int64_t sector_num;
int ret;
- count = count1;
+ count = qiov->size;
+
/* first write to align to sector start */
len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
if (len > count)
if (len > 0) {
if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
return ret;
- memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
+ qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)),
+ len);
if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
return ret;
count -= len;
if (count == 0)
- return count1;
+ return qiov->size;
sector_num++;
- buf += len;
}
/* write the sectors "in place" */
nb_sectors = count >> BDRV_SECTOR_BITS;
if (nb_sectors > 0) {
- if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
+ QEMUIOVector qiov_inplace;
+
+ qemu_iovec_init(&qiov_inplace, qiov->niov);
+ qemu_iovec_concat(&qiov_inplace, qiov, len,
+ nb_sectors << BDRV_SECTOR_BITS);
+ ret = bdrv_writev(bs, sector_num, &qiov_inplace);
+ qemu_iovec_destroy(&qiov_inplace);
+ if (ret < 0) {
return ret;
+ }
+
sector_num += nb_sectors;
len = nb_sectors << BDRV_SECTOR_BITS;
- buf += len;
count -= len;
}
if (count > 0) {
if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
return ret;
- memcpy(tmp_buf, buf, count);
+ qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count);
if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
return ret;
}
- return count1;
+ return qiov->size;
+}
+
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count1)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = count1,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_pwritev(bs, offset, &qiov);
}
/*
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
- round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
cluster_sector_num, cluster_nb_sectors);
typedef struct BdrvCoIsAllocatedData {
BlockDriverState *bs;
+ BlockDriverState *base;
int64_t sector_num;
int nb_sectors;
int *pnum;
*
* [sector_num+x, nr_sectors] allocated.
*/
- if (n > pnum_inter) {
+ if (n > pnum_inter &&
+ (intermediate == top ||
+ sector_num + pnum_inter < intermediate->total_sectors)) {
n = pnum_inter;
}
return 0;
}
+/* Coroutine wrapper for bdrv_is_allocated_above() */
+static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque)
+{
+ BdrvCoIsAllocatedData *data = opaque;
+ BlockDriverState *top = data->bs;
+ BlockDriverState *base = data->base;
+
+ data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num,
+ data->nb_sectors, data->pnum);
+ data->done = true;
+}
+
+/*
+ * Synchronous wrapper around bdrv_co_is_allocated_above().
+ *
+ * See bdrv_co_is_allocated_above() for details.
+ */
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ Coroutine *co;
+ BdrvCoIsAllocatedData data = {
+ .bs = top,
+ .base = base,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .pnum = pnum,
+ .done = false,
+ };
+
+ co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry);
+ qemu_coroutine_enter(co, &data);
+ while (!data.done) {
+ qemu_aio_wait();
+ }
+ return data.ret;
+}
+
BlockInfo *bdrv_query_info(BlockDriverState *bs)
{
BlockInfo *info = g_malloc0(sizeof(*info));
if (bs->dirty_bitmap) {
info->has_dirty = true;
info->dirty = g_malloc0(sizeof(*info->dirty));
- info->dirty->count = bdrv_get_dirty_count(bs) *
- BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE;
+ info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
+ info->dirty->granularity =
+ ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
}
if (bs->drv) {
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = size,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_writev_vmstate(bs, &qiov, pos);
+}
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
BlockDriver *drv = bs->drv;
- if (!drv)
+
+ if (!drv) {
return -ENOMEDIUM;
- if (drv->bdrv_save_vmstate)
- return drv->bdrv_save_vmstate(bs, buf, pos, size);
- if (bs->file)
- return bdrv_save_vmstate(bs->file, buf, pos, size);
+ } else if (drv->bdrv_save_vmstate) {
+ return drv->bdrv_save_vmstate(bs, qiov, pos);
+ } else if (bs->file) {
+ return bdrv_writev_vmstate(bs->file, qiov, pos);
+ }
+
return -ENOTSUP;
}
if (bs->file) {
drv->bdrv_close(bs);
ret = bdrv_snapshot_goto(bs->file, snapshot_id);
- open_ret = drv->bdrv_open(bs, bs->open_flags);
+ open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
if (open_ret < 0) {
bdrv_delete(bs->file);
bs->drv = NULL;
bool is_write, double elapsed_time, uint64_t *wait)
{
uint64_t bps_limit = 0;
+ uint64_t extension;
double bytes_limit, bytes_base, bytes_res;
double slice_time, wait_time;
slice_time = bs->slice_end - bs->slice_start;
slice_time /= (NANOSECONDS_PER_SECOND);
bytes_limit = bps_limit * slice_time;
- bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
+ bytes_base = bs->slice_submitted.bytes[is_write];
if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
- bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
+ bytes_base += bs->slice_submitted.bytes[!is_write];
}
/* bytes_base: the bytes of data which have been read/written; and
* info can be kept until the timer fire, so it is increased and tuned
* based on the result of experiment.
*/
- bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
- bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
+ extension = wait_time * NANOSECONDS_PER_SECOND;
+ extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
+ BLOCK_IO_SLICE_TIME;
+ bs->slice_end += extension;
if (wait) {
- *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
+ *wait = wait_time * NANOSECONDS_PER_SECOND;
}
return true;
slice_time = bs->slice_end - bs->slice_start;
slice_time /= (NANOSECONDS_PER_SECOND);
ios_limit = iops_limit * slice_time;
- ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
+ ios_base = bs->slice_submitted.ios[is_write];
if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
- ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
+ ios_base += bs->slice_submitted.ios[!is_write];
}
if (ios_base + 1 <= ios_limit) {
return false;
}
- /* Calc approx time to dispatch */
+ /* Calc approx time to dispatch, in seconds */
wait_time = (ios_base + 1) / iops_limit;
if (wait_time > elapsed_time) {
wait_time = wait_time - elapsed_time;
wait_time = 0;
}
- bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
- bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
+ /* Exceeded current slice, extend it by another slice time */
+ bs->slice_end += BLOCK_IO_SLICE_TIME;
if (wait) {
- *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
+ *wait = wait_time * NANOSECONDS_PER_SECOND;
}
return true;
int bps_ret, iops_ret;
now = qemu_get_clock_ns(vm_clock);
- if ((bs->slice_start < now)
- && (bs->slice_end > now)) {
- bs->slice_end = now + bs->slice_time;
- } else {
- bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
+ if (now > bs->slice_end) {
bs->slice_start = now;
- bs->slice_end = now + bs->slice_time;
-
- bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
- bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
-
- bs->io_base.ios[is_write] = bs->nr_ops[is_write];
- bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
+ bs->slice_end = now + BLOCK_IO_SLICE_TIME;
+ memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
}
elapsed_time = now - bs->slice_start;
*wait = 0;
}
+ bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
+ BDRV_SECTOR_SIZE;
+ bs->slice_submitted.ios[is_write]++;
+
return false;
}
return -EIO;
} else if (bs->read_only) {
return -EROFS;
- } else if (bs->drv->bdrv_co_discard) {
+ }
+
+ if (bs->dirty_bitmap) {
+ bdrv_reset_dirty(bs, sector_num, nb_sectors);
+ }
+
+ /* Do nothing if disabled. */
+ if (!(bs->open_flags & BDRV_O_UNMAP)) {
+ return 0;
+ }
+
+ if (bs->drv->bdrv_co_discard) {
return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
} else if (bs->drv->bdrv_aio_discard) {
BlockDriverAIOCB *acb;
return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
}
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
+{
+ int i;
+
+ for (i = 0; i < qiov->niov; i++) {
+ if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
{
int64_t bitmap_size;
- bs->dirty_count = 0;
- if (enable) {
- if (!bs->dirty_bitmap) {
- bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
- BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
- bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
+ assert((granularity & (granularity - 1)) == 0);
- bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
- }
+ if (granularity) {
+ granularity >>= BDRV_SECTOR_BITS;
+ assert(!bs->dirty_bitmap);
+ bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+ bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
} else {
if (bs->dirty_bitmap) {
- g_free(bs->dirty_bitmap);
+ hbitmap_free(bs->dirty_bitmap);
bs->dirty_bitmap = NULL;
}
}
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
{
- int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
-
- if (bs->dirty_bitmap &&
- (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
- return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] &
- (1UL << (chunk % BITS_PER_LONG)));
+ if (bs->dirty_bitmap) {
+ return hbitmap_get(bs->dirty_bitmap, sector);
} else {
return 0;
}
}
-int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector)
+void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
{
- int64_t chunk;
- int bit, elem;
-
- /* Avoid an infinite loop. */
- assert(bs->dirty_count > 0);
-
- sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
- chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
-
- QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG);
- elem = chunk / BITS_PER_LONG;
- bit = chunk % BITS_PER_LONG;
- for (;;) {
- if (sector >= bs->total_sectors) {
- sector = 0;
- bit = elem = 0;
- }
- if (bit == 0 && bs->dirty_bitmap[elem] == 0) {
- sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
- elem++;
- } else {
- if (bs->dirty_bitmap[elem] & (1UL << bit)) {
- return sector;
- }
- sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
- if (++bit == BITS_PER_LONG) {
- bit = 0;
- elem++;
- }
- }
- }
+ hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
}
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors)
{
- set_dirty_bitmap(bs, cur_sector, nr_sectors, 1);
+ hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors)
{
- set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
+ hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
}
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
{
- return bs->dirty_count;
+ if (bs->dirty_bitmap) {
+ return hbitmap_count(bs->dirty_bitmap);
+ } else {
+ return 0;
+ }
}
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
void bdrv_img_create(const char *filename, const char *fmt,
const char *base_filename, const char *base_fmt,
- char *options, uint64_t img_size, int flags, Error **errp)
+ char *options, uint64_t img_size, int flags,
+ Error **errp, bool quiet)
{
QEMUOptionParameter *param = NULL, *create_options = NULL;
QEMUOptionParameter *backing_fmt, *backing_file, *size;
bs = bdrv_new("");
- ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
+ ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags,
+ backing_drv);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not open '%s'",
backing_file->value.s);
}
}
- printf("Formatting '%s', fmt=%s ", filename, fmt);
- print_option_parameters(param);
- puts("");
-
+ if (!quiet) {
+ printf("Formatting '%s', fmt=%s ", filename, fmt);
+ print_option_parameters(param);
+ puts("");
+ }
ret = bdrv_create(drv, filename, param);
if (ret < 0) {
if (ret == -ENOTSUP) {
bdrv_delete(bs);
}
}
+
+AioContext *bdrv_get_aio_context(BlockDriverState *bs)
+{
+ /* Currently BlockDriverState always uses the main loop AioContext */
+ return qemu_get_aio_context();
+}