2 * QEMU System Emulator block driver
4 * Copyright (c) 2011 IBM Corp.
5 * Copyright (c) 2012 Red Hat, Inc.
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "block/block.h"
29 #include "block/blockjob_int.h"
30 #include "block/block_int.h"
31 #include "sysemu/block-backend.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qapi/qmp/qjson.h"
34 #include "qemu/coroutine.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
40 /* Right now, this mutex is only needed to synchronize accesses to job->busy
41 * and job->sleep_timer, such as concurrent calls to block_job_do_yield and
43 static QemuMutex block_job_mutex;
45 static void block_job_lock(void)
47 qemu_mutex_lock(&block_job_mutex);
50 static void block_job_unlock(void)
52 qemu_mutex_unlock(&block_job_mutex);
55 static void __attribute__((__constructor__)) block_job_init(void)
57 qemu_mutex_init(&block_job_mutex);
60 static void block_job_event_cancelled(BlockJob *job);
61 static void block_job_event_completed(BlockJob *job, const char *msg);
62 static void block_job_enter_cond(BlockJob *job, bool(*fn)(BlockJob *job));
64 /* Transactional group of block jobs */
67 /* Is this txn being cancelled? */
71 QLIST_HEAD(, BlockJob) jobs;
77 static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
80 * The block job API is composed of two categories of functions.
82 * The first includes functions used by the monitor. The monitor is
83 * peculiar in that it accesses the block job list with block_job_get, and
84 * therefore needs consistency across block_job_get and the actual operation
85 * (e.g. block_job_set_speed). The consistency is achieved with
86 * aio_context_acquire/release. These functions are declared in blockjob.h.
88 * The second includes functions used by the block job drivers and sometimes
89 * by the core block layer. These do not care about locking, because the
90 * whole coroutine runs under the AioContext lock, and are declared in
94 BlockJob *block_job_next(BlockJob *job)
97 return QLIST_FIRST(&block_jobs);
99 return QLIST_NEXT(job, job_list);
102 BlockJob *block_job_get(const char *id)
106 QLIST_FOREACH(job, &block_jobs, job_list) {
107 if (job->id && !strcmp(id, job->id)) {
115 BlockJobTxn *block_job_txn_new(void)
117 BlockJobTxn *txn = g_new0(BlockJobTxn, 1);
118 QLIST_INIT(&txn->jobs);
123 static void block_job_txn_ref(BlockJobTxn *txn)
128 void block_job_txn_unref(BlockJobTxn *txn)
130 if (txn && --txn->refcnt == 0) {
135 void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job)
144 QLIST_INSERT_HEAD(&txn->jobs, job, txn_list);
145 block_job_txn_ref(txn);
148 static void block_job_pause(BlockJob *job)
153 static void block_job_resume(BlockJob *job)
155 assert(job->pause_count > 0);
157 if (job->pause_count) {
160 block_job_enter(job);
163 void block_job_ref(BlockJob *job)
168 static void block_job_attached_aio_context(AioContext *new_context,
170 static void block_job_detach_aio_context(void *opaque);
172 void block_job_unref(BlockJob *job)
174 if (--job->refcnt == 0) {
175 BlockDriverState *bs = blk_bs(job->blk);
176 QLIST_REMOVE(job, job_list);
178 block_job_remove_all_bdrv(job);
179 blk_remove_aio_context_notifier(job->blk,
180 block_job_attached_aio_context,
181 block_job_detach_aio_context, job);
183 error_free(job->blocker);
185 assert(!timer_pending(&job->sleep_timer));
190 static void block_job_attached_aio_context(AioContext *new_context,
193 BlockJob *job = opaque;
195 if (job->driver->attached_aio_context) {
196 job->driver->attached_aio_context(job, new_context);
199 block_job_resume(job);
202 static void block_job_drain(BlockJob *job)
204 /* If job is !job->busy this kicks it into the next pause point. */
205 block_job_enter(job);
208 if (job->driver->drain) {
209 job->driver->drain(job);
213 static void block_job_detach_aio_context(void *opaque)
215 BlockJob *job = opaque;
217 /* In case the job terminates during aio_poll()... */
220 block_job_pause(job);
222 while (!job->paused && !job->completed) {
223 block_job_drain(job);
226 block_job_unref(job);
229 static char *child_job_get_parent_desc(BdrvChild *c)
231 BlockJob *job = c->opaque;
232 return g_strdup_printf("%s job '%s'",
233 BlockJobType_str(job->driver->job_type),
237 static const BdrvChildRole child_job = {
238 .get_parent_desc = child_job_get_parent_desc,
239 .stay_at_node = true,
242 static void block_job_drained_begin(void *opaque)
244 BlockJob *job = opaque;
245 block_job_pause(job);
248 static void block_job_drained_end(void *opaque)
250 BlockJob *job = opaque;
251 block_job_resume(job);
254 static const BlockDevOps block_job_dev_ops = {
255 .drained_begin = block_job_drained_begin,
256 .drained_end = block_job_drained_end,
259 void block_job_remove_all_bdrv(BlockJob *job)
262 for (l = job->nodes; l; l = l->next) {
263 BdrvChild *c = l->data;
264 bdrv_op_unblock_all(c->bs, job->blocker);
265 bdrv_root_unref_child(c);
267 g_slist_free(job->nodes);
271 int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
272 uint64_t perm, uint64_t shared_perm, Error **errp)
276 c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
282 job->nodes = g_slist_prepend(job->nodes, c);
284 bdrv_op_block_all(bs, job->blocker);
289 bool block_job_is_internal(BlockJob *job)
291 return (job->id == NULL);
294 static bool block_job_started(BlockJob *job)
300 * All jobs must allow a pause point before entering their job proper. This
301 * ensures that jobs can be paused prior to being started, then resumed later.
303 static void coroutine_fn block_job_co_entry(void *opaque)
305 BlockJob *job = opaque;
307 assert(job && job->driver && job->driver->start);
308 block_job_pause_point(job);
309 job->driver->start(job);
312 static void block_job_sleep_timer_cb(void *opaque)
314 BlockJob *job = opaque;
316 block_job_enter(job);
319 void block_job_start(BlockJob *job)
321 assert(job && !block_job_started(job) && job->paused &&
322 job->driver && job->driver->start);
323 job->co = qemu_coroutine_create(block_job_co_entry, job);
327 bdrv_coroutine_enter(blk_bs(job->blk), job->co);
330 static void block_job_completed_single(BlockJob *job)
332 assert(job->completed);
335 if (job->driver->commit) {
336 job->driver->commit(job);
339 if (job->driver->abort) {
340 job->driver->abort(job);
343 if (job->driver->clean) {
344 job->driver->clean(job);
348 job->cb(job->opaque, job->ret);
351 /* Emit events only if we actually started */
352 if (block_job_started(job)) {
353 if (block_job_is_cancelled(job)) {
354 block_job_event_cancelled(job);
356 const char *msg = NULL;
358 msg = strerror(-job->ret);
360 block_job_event_completed(job, msg);
365 QLIST_REMOVE(job, txn_list);
366 block_job_txn_unref(job->txn);
368 block_job_unref(job);
371 static void block_job_cancel_async(BlockJob *job)
373 if (job->iostatus != BLOCK_DEVICE_IO_STATUS_OK) {
374 block_job_iostatus_reset(job);
376 if (job->user_paused) {
377 /* Do not call block_job_enter here, the caller will handle it. */
378 job->user_paused = false;
381 job->cancelled = true;
384 static int block_job_finish_sync(BlockJob *job,
385 void (*finish)(BlockJob *, Error **errp),
388 Error *local_err = NULL;
391 assert(blk_bs(job->blk)->job == job);
396 finish(job, &local_err);
399 error_propagate(errp, local_err);
400 block_job_unref(job);
403 /* block_job_drain calls block_job_enter, and it should be enough to
404 * induce progress until the job completes or moves to the main thread.
406 while (!job->deferred_to_main_loop && !job->completed) {
407 block_job_drain(job);
409 while (!job->completed) {
410 aio_poll(qemu_get_aio_context(), true);
412 ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
413 block_job_unref(job);
417 static void block_job_completed_txn_abort(BlockJob *job)
420 BlockJobTxn *txn = job->txn;
425 * We are cancelled by another job, which will handle everything.
429 txn->aborting = true;
430 block_job_txn_ref(txn);
432 /* We are the first failed job. Cancel other jobs. */
433 QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
434 ctx = blk_get_aio_context(other_job->blk);
435 aio_context_acquire(ctx);
438 /* Other jobs are effectively cancelled by us, set the status for
439 * them; this job, however, may or may not be cancelled, depending
440 * on the caller, so leave it. */
441 QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
442 if (other_job != job) {
443 block_job_cancel_async(other_job);
446 while (!QLIST_EMPTY(&txn->jobs)) {
447 other_job = QLIST_FIRST(&txn->jobs);
448 ctx = blk_get_aio_context(other_job->blk);
449 if (!other_job->completed) {
450 assert(other_job->cancelled);
451 block_job_finish_sync(other_job, NULL, NULL);
453 block_job_completed_single(other_job);
454 aio_context_release(ctx);
457 block_job_txn_unref(txn);
460 static void block_job_completed_txn_success(BlockJob *job)
463 BlockJobTxn *txn = job->txn;
464 BlockJob *other_job, *next;
466 * Successful completion, see if there are other running jobs in this
469 QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
470 if (!other_job->completed) {
474 /* We are the last completed job, commit the transaction. */
475 QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
476 ctx = blk_get_aio_context(other_job->blk);
477 aio_context_acquire(ctx);
478 assert(other_job->ret == 0);
479 block_job_completed_single(other_job);
480 aio_context_release(ctx);
484 /* Assumes the block_job_mutex is held */
485 static bool block_job_timer_pending(BlockJob *job)
487 return timer_pending(&job->sleep_timer);
490 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
492 Error *local_err = NULL;
493 int64_t old_speed = job->speed;
495 if (!job->driver->set_speed) {
496 error_setg(errp, QERR_UNSUPPORTED);
499 job->driver->set_speed(job, speed, &local_err);
501 error_propagate(errp, local_err);
506 if (speed <= old_speed) {
510 /* kick only if a timer is pending */
511 block_job_enter_cond(job, block_job_timer_pending);
514 void block_job_complete(BlockJob *job, Error **errp)
516 /* Should not be reachable via external interface for internal jobs */
518 if (job->pause_count || job->cancelled ||
519 !block_job_started(job) || !job->driver->complete) {
520 error_setg(errp, "The active block job '%s' cannot be completed",
525 job->driver->complete(job, errp);
528 void block_job_user_pause(BlockJob *job)
530 job->user_paused = true;
531 block_job_pause(job);
534 bool block_job_user_paused(BlockJob *job)
536 return job->user_paused;
539 void block_job_user_resume(BlockJob *job)
541 if (job && job->user_paused && job->pause_count > 0) {
542 block_job_iostatus_reset(job);
543 job->user_paused = false;
544 block_job_resume(job);
548 void block_job_cancel(BlockJob *job)
550 if (block_job_started(job)) {
551 block_job_cancel_async(job);
552 block_job_enter(job);
554 block_job_completed(job, -ECANCELED);
558 /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
559 * used with block_job_finish_sync() without the need for (rather nasty)
560 * function pointer casts there. */
561 static void block_job_cancel_err(BlockJob *job, Error **errp)
563 block_job_cancel(job);
566 int block_job_cancel_sync(BlockJob *job)
568 return block_job_finish_sync(job, &block_job_cancel_err, NULL);
571 void block_job_cancel_sync_all(void)
574 AioContext *aio_context;
576 while ((job = QLIST_FIRST(&block_jobs))) {
577 aio_context = blk_get_aio_context(job->blk);
578 aio_context_acquire(aio_context);
579 block_job_cancel_sync(job);
580 aio_context_release(aio_context);
584 int block_job_complete_sync(BlockJob *job, Error **errp)
586 return block_job_finish_sync(job, &block_job_complete, errp);
589 BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
593 if (block_job_is_internal(job)) {
594 error_setg(errp, "Cannot query QEMU internal jobs");
597 info = g_new0(BlockJobInfo, 1);
598 info->type = g_strdup(BlockJobType_str(job->driver->job_type));
599 info->device = g_strdup(job->id);
600 info->len = job->len;
601 info->busy = atomic_read(&job->busy);
602 info->paused = job->pause_count > 0;
603 info->offset = job->offset;
604 info->speed = job->speed;
605 info->io_status = job->iostatus;
606 info->ready = job->ready;
610 static void block_job_iostatus_set_err(BlockJob *job, int error)
612 if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
613 job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
614 BLOCK_DEVICE_IO_STATUS_FAILED;
618 static void block_job_event_cancelled(BlockJob *job)
620 if (block_job_is_internal(job)) {
624 qapi_event_send_block_job_cancelled(job->driver->job_type,
632 static void block_job_event_completed(BlockJob *job, const char *msg)
634 if (block_job_is_internal(job)) {
638 qapi_event_send_block_job_completed(job->driver->job_type,
649 * API for block job drivers and the block layer. These functions are
650 * declared in blockjob_int.h.
653 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
654 BlockDriverState *bs, uint64_t perm,
655 uint64_t shared_perm, int64_t speed, int flags,
656 BlockCompletionFunc *cb, void *opaque, Error **errp)
663 error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
667 if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) {
668 job_id = bdrv_get_device_name(bs);
670 error_setg(errp, "An explicit job ID is required for this node");
676 if (flags & BLOCK_JOB_INTERNAL) {
677 error_setg(errp, "Cannot specify job ID for internal block job");
681 if (!id_wellformed(job_id)) {
682 error_setg(errp, "Invalid job ID '%s'", job_id);
686 if (block_job_get(job_id)) {
687 error_setg(errp, "Job ID '%s' already in use", job_id);
692 blk = blk_new(perm, shared_perm);
693 ret = blk_insert_bs(blk, bs, errp);
699 job = g_malloc0(driver->instance_size);
700 job->driver = driver;
701 job->id = g_strdup(job_id);
704 job->opaque = opaque;
707 job->pause_count = 1;
709 aio_timer_init(qemu_get_aio_context(), &job->sleep_timer,
710 QEMU_CLOCK_REALTIME, SCALE_NS,
711 block_job_sleep_timer_cb, job);
713 error_setg(&job->blocker, "block device is in use by block job: %s",
714 BlockJobType_str(driver->job_type));
715 block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
718 blk_set_dev_ops(blk, &block_job_dev_ops, job);
719 bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
721 QLIST_INSERT_HEAD(&block_jobs, job, job_list);
723 blk_add_aio_context_notifier(blk, block_job_attached_aio_context,
724 block_job_detach_aio_context, job);
726 /* Only set speed when necessary to avoid NotSupported error */
728 Error *local_err = NULL;
730 block_job_set_speed(job, speed, &local_err);
732 block_job_unref(job);
733 error_propagate(errp, local_err);
740 void block_job_pause_all(void)
742 BlockJob *job = NULL;
743 while ((job = block_job_next(job))) {
744 AioContext *aio_context = blk_get_aio_context(job->blk);
746 aio_context_acquire(aio_context);
748 block_job_pause(job);
749 aio_context_release(aio_context);
753 void block_job_early_fail(BlockJob *job)
755 block_job_unref(job);
758 void block_job_completed(BlockJob *job, int ret)
760 assert(blk_bs(job->blk)->job == job);
761 assert(!job->completed);
762 job->completed = true;
765 block_job_completed_single(job);
766 } else if (ret < 0 || block_job_is_cancelled(job)) {
767 block_job_completed_txn_abort(job);
769 block_job_completed_txn_success(job);
773 static bool block_job_should_pause(BlockJob *job)
775 return job->pause_count > 0;
778 /* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds.
779 * Reentering the job coroutine with block_job_enter() before the timer has
780 * expired is allowed and cancels the timer.
782 * If @ns is (uint64_t) -1, no timer is scheduled and block_job_enter() must be
783 * called explicitly. */
784 static void block_job_do_yield(BlockJob *job, uint64_t ns)
788 timer_mod(&job->sleep_timer, ns);
792 qemu_coroutine_yield();
794 /* Set by block_job_enter before re-entering the coroutine. */
798 void coroutine_fn block_job_pause_point(BlockJob *job)
800 assert(job && block_job_started(job));
802 if (!block_job_should_pause(job)) {
805 if (block_job_is_cancelled(job)) {
809 if (job->driver->pause) {
810 job->driver->pause(job);
813 if (block_job_should_pause(job) && !block_job_is_cancelled(job)) {
815 block_job_do_yield(job, -1);
819 if (job->driver->resume) {
820 job->driver->resume(job);
824 void block_job_resume_all(void)
826 BlockJob *job, *next;
828 QLIST_FOREACH_SAFE(job, &block_jobs, job_list, next) {
829 AioContext *aio_context = blk_get_aio_context(job->blk);
831 aio_context_acquire(aio_context);
832 block_job_resume(job);
833 block_job_unref(job);
834 aio_context_release(aio_context);
839 * Conditionally enter a block_job pending a call to fn() while
840 * under the block_job_lock critical section.
842 static void block_job_enter_cond(BlockJob *job, bool(*fn)(BlockJob *job))
844 if (!block_job_started(job)) {
847 if (job->deferred_to_main_loop) {
857 if (fn && !fn(job)) {
862 assert(!job->deferred_to_main_loop);
863 timer_del(&job->sleep_timer);
866 aio_co_wake(job->co);
869 void block_job_enter(BlockJob *job)
871 block_job_enter_cond(job, NULL);
874 bool block_job_is_cancelled(BlockJob *job)
876 return job->cancelled;
879 void block_job_sleep_ns(BlockJob *job, int64_t ns)
883 /* Check cancellation *before* setting busy = false, too! */
884 if (block_job_is_cancelled(job)) {
888 if (!block_job_should_pause(job)) {
889 block_job_do_yield(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
892 block_job_pause_point(job);
895 void block_job_yield(BlockJob *job)
899 /* Check cancellation *before* setting busy = false, too! */
900 if (block_job_is_cancelled(job)) {
904 if (!block_job_should_pause(job)) {
905 block_job_do_yield(job, -1);
908 block_job_pause_point(job);
911 void block_job_iostatus_reset(BlockJob *job)
913 if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
916 assert(job->user_paused && job->pause_count > 0);
917 job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
920 void block_job_event_ready(BlockJob *job)
924 if (block_job_is_internal(job)) {
928 qapi_event_send_block_job_ready(job->driver->job_type,
932 job->speed, &error_abort);
935 BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
936 int is_read, int error)
938 BlockErrorAction action;
941 case BLOCKDEV_ON_ERROR_ENOSPC:
942 case BLOCKDEV_ON_ERROR_AUTO:
943 action = (error == ENOSPC) ?
944 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
946 case BLOCKDEV_ON_ERROR_STOP:
947 action = BLOCK_ERROR_ACTION_STOP;
949 case BLOCKDEV_ON_ERROR_REPORT:
950 action = BLOCK_ERROR_ACTION_REPORT;
952 case BLOCKDEV_ON_ERROR_IGNORE:
953 action = BLOCK_ERROR_ACTION_IGNORE;
958 if (!block_job_is_internal(job)) {
959 qapi_event_send_block_job_error(job->id,
960 is_read ? IO_OPERATION_TYPE_READ :
961 IO_OPERATION_TYPE_WRITE,
962 action, &error_abort);
964 if (action == BLOCK_ERROR_ACTION_STOP) {
965 /* make the pause user visible, which will be resumed from QMP. */
966 block_job_user_pause(job);
967 block_job_iostatus_set_err(job, error);
974 AioContext *aio_context;
975 BlockJobDeferToMainLoopFn *fn;
977 } BlockJobDeferToMainLoopData;
979 static void block_job_defer_to_main_loop_bh(void *opaque)
981 BlockJobDeferToMainLoopData *data = opaque;
982 AioContext *aio_context;
984 /* Prevent race with block_job_defer_to_main_loop() */
985 aio_context_acquire(data->aio_context);
987 /* Fetch BDS AioContext again, in case it has changed */
988 aio_context = blk_get_aio_context(data->job->blk);
989 if (aio_context != data->aio_context) {
990 aio_context_acquire(aio_context);
993 data->fn(data->job, data->opaque);
995 if (aio_context != data->aio_context) {
996 aio_context_release(aio_context);
999 aio_context_release(data->aio_context);
1004 void block_job_defer_to_main_loop(BlockJob *job,
1005 BlockJobDeferToMainLoopFn *fn,
1008 BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
1010 data->aio_context = blk_get_aio_context(job->blk);
1012 data->opaque = opaque;
1013 job->deferred_to_main_loop = true;
1015 aio_bh_schedule_oneshot(qemu_get_aio_context(),
1016 block_job_defer_to_main_loop_bh, data);