#define BLOCK_FLAG_ENCRYPT 1
#define BLOCK_FLAG_COMPAT6 4
+#define BLOCK_IO_LIMIT_READ 0
+#define BLOCK_IO_LIMIT_WRITE 1
+#define BLOCK_IO_LIMIT_TOTAL 2
+
+#define BLOCK_IO_SLICE_TIME 100000000
+#define NANOSECONDS_PER_SECOND 1000000000.0
+
#define BLOCK_OPT_SIZE "size"
#define BLOCK_OPT_ENCRYPT "encryption"
#define BLOCK_OPT_COMPAT6 "compat6"
#define BLOCK_OPT_TABLE_SIZE "table_size"
#define BLOCK_OPT_PREALLOC "preallocation"
#define BLOCK_OPT_SUBFMT "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL "compat"
+
+typedef struct BdrvTrackedRequest BdrvTrackedRequest;
+
+typedef struct BlockIOLimit {
+ int64_t bps[3];
+ int64_t iops[3];
+} BlockIOLimit;
+
+typedef struct BlockIOBaseValue {
+ uint64_t bytes[2];
+ uint64_t ios[2];
+} BlockIOBaseValue;
+
+typedef struct BlockJob BlockJob;
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+ /** Derived BlockJob struct size */
+ size_t instance_size;
+
+ /** String describing the operation, part of query-block-jobs QMP API */
+ const char *job_type;
+
+ /** Optional callback for job types that support setting a speed limit */
+ int (*set_speed)(BlockJob *job, int64_t value);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+ /** The job type, including the job vtable. */
+ const BlockJobType *job_type;
+
+ /** The block device on which the job is operating. */
+ BlockDriverState *bs;
-typedef struct AIOPool {
- void (*cancel)(BlockDriverAIOCB *acb);
- int aiocb_size;
- BlockDriverAIOCB *free_aiocb;
-} AIOPool;
+ /**
+ * Set to true if the job should cancel itself. The flag must
+ * always be tested just before toggling the busy flag from false
+ * to true. After a job has detected that the cancelled flag is
+ * true, it should not anymore issue any I/O operation to the
+ * block device.
+ */
+ bool cancelled;
+
+ /**
+ * Set to false by the job while it is in a quiescent state, where
+ * no I/O is pending and cancellation can be processed without
+ * issuing new I/O. The busy flag must be set to false when the
+ * job goes to sleep on any condition that is not detected by
+ * #qemu_aio_wait, such as a timer.
+ */
+ bool busy;
+
+ /** Offset that is published by the query-block-jobs QMP API */
+ int64_t offset;
+
+ /** Length that is published by the query-block-jobs QMP API */
+ int64_t len;
+
+ /** Speed that was set with @block_job_set_speed. */
+ int64_t speed;
+
+ /** The completion function that will be called when the job completes. */
+ BlockDriverCompletionFunc *cb;
+
+ /** The opaque value that is passed to the completion function. */
+ void *opaque;
+};
struct BlockDriver {
const char *format_name;
const uint8_t *buf, int nb_sectors);
void (*bdrv_close)(BlockDriverState *bs);
int (*bdrv_create)(const char *filename, QEMUOptionParameter *options);
- int (*bdrv_is_allocated)(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum);
int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
int (*bdrv_make_empty)(BlockDriverState *bs);
/* aio */
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
- int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL and .bdrv_co_writev() will be called
+ * instead.
+ */
+ int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+ /*
+ * Invalidate any cached meta-data.
+ */
+ void (*bdrv_invalidate_cache)(BlockDriverState *bs);
- int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs);
- int (*bdrv_merge_requests)(BlockDriverState *bs, BlockRequest* a,
- BlockRequest *b);
+ /*
+ * Flushes all data that was already written to the OS all the way down to
+ * the disk (for example raw-posix calls fsync()).
+ */
+ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+ /*
+ * Flushes all internal caches to the OS. The data may still sit in a
+ * writeback cache of the host OS, but it will survive a crash of the qemu
+ * process.
+ */
+ int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
const char *protocol_name;
int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
/* removable device specific */
int (*bdrv_is_inserted)(BlockDriverState *bs);
int (*bdrv_media_changed)(BlockDriverState *bs);
- void (*bdrv_eject)(BlockDriverState *bs, int eject_flag);
+ void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
/* to control generic scsi devices */
QLIST_ENTRY(BlockDriver) list;
};
+/*
+ * Note: the function bdrv_append() copies and swaps contents of
+ * BlockDriverStates, so if you add new fields to this struct, please
+ * inspect bdrv_append() to determine if the new fields need to be
+ * copied as well.
+ */
struct BlockDriverState {
int64_t total_sectors; /* if we are reading a disk image, give its
size in sectors */
int encrypted; /* if true, the media is encrypted */
int valid_key; /* if true, a valid encryption key has been set */
int sg; /* if true, the device is a /dev/sg* */
+ int copy_on_read; /* if true, copy read backing sectors into image
+ note this is a reference count */
BlockDriver *drv; /* NULL means no media */
void *opaque;
BlockDriverState *backing_hd;
BlockDriverState *file;
- /* async read/write emulation */
+ /* number of in-flight copy-on-read requests */
+ unsigned int copy_on_read_in_flight;
- void *sync_aiocb;
+ /* the time for latest disk I/O */
+ int64_t slice_time;
+ int64_t slice_start;
+ int64_t slice_end;
+ BlockIOLimit io_limits;
+ BlockIOBaseValue io_base;
+ CoQueue throttled_reqs;
+ QEMUTimer *block_timer;
+ bool io_limits_enabled;
/* I/O stats (display with "info blockstats"). */
uint64_t nr_bytes[BDRV_MAX_IOTYPE];
int64_t dirty_count;
int in_use; /* users other than guest access, eg. block migration */
QTAILQ_ENTRY(BlockDriverState) list;
- void *private;
-};
-struct BlockDriverAIOCB {
- AIOPool *pool;
- BlockDriverState *bs;
- BlockDriverCompletionFunc *cb;
- void *opaque;
- BlockDriverAIOCB *next;
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+
+ /* long-running background operation */
+ BlockJob *job;
};
void get_tmp_filename(char *filename, int size);
-void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
-void qemu_aio_release(void *p);
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits);
#ifdef _WIN32
int is_windows_drive(const char *filename);
#endif
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ *
+ * Create a new long-running block device job and return it. The job
+ * will call @cb asynchronously when the job completes. Note that
+ * @bs may have been closed at the time the @cb it is called. If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+/**
+ * block_job_complete:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_complete(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+int block_job_set_speed(BlockJob *job, int64_t value);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the job and wait for it to reach a quiescent
+ * state. Note that the completion callback will still be called
+ * asynchronously, hence it is *not* valid to call #bdrv_delete
+ * immediately after #block_job_cancel_sync. Users of block jobs
+ * will usually protect the BlockDriverState objects with a reference
+ * count, should this be a concern.
+ */
+void block_job_cancel_sync(BlockJob *job);
+
+/**
+ * stream_start:
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @base_id: The file name that will be written to @bs as the new
+ * backing file if the job completes. Ignored if @base is %NULL.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @base_id in the written image and to @base in the live BlockDriverState.
+ */
+int stream_start(BlockDriverState *bs, BlockDriverState *base,
+ const char *base_id, BlockDriverCompletionFunc *cb,
+ void *opaque);
+
#endif /* BLOCK_INT_H */