]> Git Repo - qemu.git/blame - block.c
block: Keep DriveInfo alive until BlockDriverState dies
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
737e150e
PB
27#include "block/block_int.h"
28#include "block/blockjob.h"
1de7afc9 29#include "qemu/module.h"
7b1b5d19 30#include "qapi/qmp/qjson.h"
9c17d615 31#include "sysemu/sysemu.h"
3ae59580 32#include "sysemu/blockdev.h" /* FIXME layering violation */
1de7afc9 33#include "qemu/notify.h"
737e150e 34#include "block/coroutine.h"
c13163fb 35#include "block/qapi.h"
b2023818 36#include "qmp-commands.h"
1de7afc9 37#include "qemu/timer.h"
a5ee7bd4 38#include "qapi-event.h"
fc01f7e7 39
71e72a19 40#ifdef CONFIG_BSD
7674e7bf
FB
41#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
72cf2d4f 44#include <sys/queue.h>
c5e97233 45#ifndef __DragonFly__
7674e7bf
FB
46#include <sys/disk.h>
47#endif
c5e97233 48#endif
7674e7bf 49
49dc768d
AL
50#ifdef _WIN32
51#include <windows.h>
52#endif
53
e4654d2d
FZ
54struct BdrvDirtyBitmap {
55 HBitmap *bitmap;
56 QLIST_ENTRY(BdrvDirtyBitmap) list;
57};
58
1c9805a3
SH
59#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60
2a87151f
SH
61#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
62
7d4b4ba5 63static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
64static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 66 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
67static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
68 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 69 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
70static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
775aa8b6
KW
76static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
470c0504 78 BdrvRequestFlags flags);
775aa8b6
KW
79static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
80 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
f08f2dda 81 BdrvRequestFlags flags);
b2a61371
SH
82static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83 int64_t sector_num,
84 QEMUIOVector *qiov,
85 int nb_sectors,
d20d9b7c 86 BdrvRequestFlags flags,
b2a61371
SH
87 BlockDriverCompletionFunc *cb,
88 void *opaque,
8c5873d6 89 bool is_write);
b2a61371 90static void coroutine_fn bdrv_co_do_rw(void *opaque);
621f0589 91static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
aa7bfbff 92 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
ec530c81 93
1b7bdbc1
SH
94static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 96
dc364f4c
BC
97static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
98 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
99
8a22f02a
SH
100static QLIST_HEAD(, BlockDriver) bdrv_drivers =
101 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 102
eb852011
MA
103/* If non-zero, use only whitelisted block drivers */
104static int use_bdrv_whitelist;
105
9e0b22f4
SH
106#ifdef _WIN32
107static int is_windows_drive_prefix(const char *filename)
108{
109 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
110 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
111 filename[1] == ':');
112}
113
114int is_windows_drive(const char *filename)
115{
116 if (is_windows_drive_prefix(filename) &&
117 filename[2] == '\0')
118 return 1;
119 if (strstart(filename, "\\\\.\\", NULL) ||
120 strstart(filename, "//./", NULL))
121 return 1;
122 return 0;
123}
124#endif
125
0563e191 126/* throttling disk I/O limits */
cc0681c4
BC
127void bdrv_set_io_limits(BlockDriverState *bs,
128 ThrottleConfig *cfg)
98f90dba 129{
cc0681c4 130 int i;
98f90dba 131
cc0681c4 132 throttle_config(&bs->throttle_state, cfg);
98f90dba 133
cc0681c4
BC
134 for (i = 0; i < 2; i++) {
135 qemu_co_enter_next(&bs->throttled_reqs[i]);
98f90dba 136 }
cc0681c4
BC
137}
138
139/* this function drain all the throttled IOs */
140static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
141{
142 bool drained = false;
143 bool enabled = bs->io_limits_enabled;
144 int i;
145
146 bs->io_limits_enabled = false;
147
148 for (i = 0; i < 2; i++) {
149 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
150 drained = true;
151 }
152 }
153
154 bs->io_limits_enabled = enabled;
98f90dba 155
cc0681c4 156 return drained;
98f90dba
ZYW
157}
158
cc0681c4 159void bdrv_io_limits_disable(BlockDriverState *bs)
0563e191 160{
cc0681c4 161 bs->io_limits_enabled = false;
0563e191 162
cc0681c4
BC
163 bdrv_start_throttled_reqs(bs);
164
165 throttle_destroy(&bs->throttle_state);
0563e191
ZYW
166}
167
cc0681c4 168static void bdrv_throttle_read_timer_cb(void *opaque)
0563e191 169{
cc0681c4
BC
170 BlockDriverState *bs = opaque;
171 qemu_co_enter_next(&bs->throttled_reqs[0]);
0563e191
ZYW
172}
173
cc0681c4 174static void bdrv_throttle_write_timer_cb(void *opaque)
0563e191 175{
cc0681c4
BC
176 BlockDriverState *bs = opaque;
177 qemu_co_enter_next(&bs->throttled_reqs[1]);
0563e191
ZYW
178}
179
cc0681c4
BC
180/* should be called before bdrv_set_io_limits if a limit is set */
181void bdrv_io_limits_enable(BlockDriverState *bs)
182{
183 assert(!bs->io_limits_enabled);
184 throttle_init(&bs->throttle_state,
13af91eb 185 bdrv_get_aio_context(bs),
cc0681c4
BC
186 QEMU_CLOCK_VIRTUAL,
187 bdrv_throttle_read_timer_cb,
188 bdrv_throttle_write_timer_cb,
189 bs);
190 bs->io_limits_enabled = true;
191}
192
193/* This function makes an IO wait if needed
194 *
195 * @nb_sectors: the number of sectors of the IO
196 * @is_write: is the IO a write
197 */
98f90dba 198static void bdrv_io_limits_intercept(BlockDriverState *bs,
d5103588 199 unsigned int bytes,
cc0681c4 200 bool is_write)
98f90dba 201{
cc0681c4
BC
202 /* does this io must wait */
203 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
98f90dba 204
cc0681c4
BC
205 /* if must wait or any request of this type throttled queue the IO */
206 if (must_wait ||
207 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
208 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
98f90dba
ZYW
209 }
210
cc0681c4 211 /* the IO will be executed, do the accounting */
d5103588
KW
212 throttle_account(&bs->throttle_state, is_write, bytes);
213
98f90dba 214
cc0681c4
BC
215 /* if the next request must wait -> do nothing */
216 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
217 return;
98f90dba
ZYW
218 }
219
cc0681c4
BC
220 /* else queue next request for execution */
221 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
98f90dba
ZYW
222}
223
339064d5
KW
224size_t bdrv_opt_mem_align(BlockDriverState *bs)
225{
226 if (!bs || !bs->drv) {
227 /* 4k should be on the safe side */
228 return 4096;
229 }
230
231 return bs->bl.opt_mem_alignment;
232}
233
9e0b22f4
SH
234/* check if the path starts with "<protocol>:" */
235static int path_has_protocol(const char *path)
236{
947995c0
PB
237 const char *p;
238
9e0b22f4
SH
239#ifdef _WIN32
240 if (is_windows_drive(path) ||
241 is_windows_drive_prefix(path)) {
242 return 0;
243 }
947995c0
PB
244 p = path + strcspn(path, ":/\\");
245#else
246 p = path + strcspn(path, ":/");
9e0b22f4
SH
247#endif
248
947995c0 249 return *p == ':';
9e0b22f4
SH
250}
251
83f64091 252int path_is_absolute(const char *path)
3b0d4f61 253{
21664424
FB
254#ifdef _WIN32
255 /* specific case for names like: "\\.\d:" */
f53f4da9 256 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 257 return 1;
f53f4da9
PB
258 }
259 return (*path == '/' || *path == '\\');
3b9f94e1 260#else
f53f4da9 261 return (*path == '/');
3b9f94e1 262#endif
3b0d4f61
FB
263}
264
83f64091
FB
265/* if filename is absolute, just copy it to dest. Otherwise, build a
266 path to it by considering it is relative to base_path. URL are
267 supported. */
268void path_combine(char *dest, int dest_size,
269 const char *base_path,
270 const char *filename)
3b0d4f61 271{
83f64091
FB
272 const char *p, *p1;
273 int len;
274
275 if (dest_size <= 0)
276 return;
277 if (path_is_absolute(filename)) {
278 pstrcpy(dest, dest_size, filename);
279 } else {
280 p = strchr(base_path, ':');
281 if (p)
282 p++;
283 else
284 p = base_path;
3b9f94e1
FB
285 p1 = strrchr(base_path, '/');
286#ifdef _WIN32
287 {
288 const char *p2;
289 p2 = strrchr(base_path, '\\');
290 if (!p1 || p2 > p1)
291 p1 = p2;
292 }
293#endif
83f64091
FB
294 if (p1)
295 p1++;
296 else
297 p1 = base_path;
298 if (p1 > p)
299 p = p1;
300 len = p - base_path;
301 if (len > dest_size - 1)
302 len = dest_size - 1;
303 memcpy(dest, base_path, len);
304 dest[len] = '\0';
305 pstrcat(dest, dest_size, filename);
3b0d4f61 306 }
3b0d4f61
FB
307}
308
dc5a1371
PB
309void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
310{
311 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
312 pstrcpy(dest, sz, bs->backing_file);
313 } else {
314 path_combine(dest, sz, bs->filename, bs->backing_file);
315 }
316}
317
5efa9d5a 318void bdrv_register(BlockDriver *bdrv)
ea2384d3 319{
8c5873d6
SH
320 /* Block drivers without coroutine functions need emulation */
321 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
322 bdrv->bdrv_co_readv = bdrv_co_readv_em;
323 bdrv->bdrv_co_writev = bdrv_co_writev_em;
324
f8c35c1d
SH
325 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
326 * the block driver lacks aio we need to emulate that too.
327 */
f9f05dc5
KW
328 if (!bdrv->bdrv_aio_readv) {
329 /* add AIO emulation layer */
330 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
331 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 332 }
83f64091 333 }
b2e12bc6 334
8a22f02a 335 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 336}
b338082b
FB
337
338/* create a new block device (by default it is empty) */
98522f63 339BlockDriverState *bdrv_new(const char *device_name, Error **errp)
b338082b 340{
1b7bdbc1 341 BlockDriverState *bs;
fbe40ff7 342 int i;
b338082b 343
f2d953ec
KW
344 if (bdrv_find(device_name)) {
345 error_setg(errp, "Device with id '%s' already exists",
346 device_name);
347 return NULL;
348 }
349 if (bdrv_find_node(device_name)) {
350 error_setg(errp, "Device with node-name '%s' already exists",
351 device_name);
352 return NULL;
353 }
354
5839e53b 355 bs = g_new0(BlockDriverState, 1);
e4654d2d 356 QLIST_INIT(&bs->dirty_bitmaps);
b338082b 357 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 358 if (device_name[0] != '\0') {
dc364f4c 359 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
ea2384d3 360 }
fbe40ff7
FZ
361 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
362 QLIST_INIT(&bs->op_blockers[i]);
363 }
28a7282a 364 bdrv_iostatus_disable(bs);
d7d512f6 365 notifier_list_init(&bs->close_notifiers);
d616b224 366 notifier_with_return_list_init(&bs->before_write_notifiers);
cc0681c4
BC
367 qemu_co_queue_init(&bs->throttled_reqs[0]);
368 qemu_co_queue_init(&bs->throttled_reqs[1]);
9fcb0251 369 bs->refcnt = 1;
dcd04228 370 bs->aio_context = qemu_get_aio_context();
d7d512f6 371
b338082b
FB
372 return bs;
373}
374
d7d512f6
PB
375void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
376{
377 notifier_list_add(&bs->close_notifiers, notify);
378}
379
ea2384d3
FB
380BlockDriver *bdrv_find_format(const char *format_name)
381{
382 BlockDriver *drv1;
8a22f02a
SH
383 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
384 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 385 return drv1;
8a22f02a 386 }
ea2384d3
FB
387 }
388 return NULL;
389}
390
b64ec4e4 391static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
eb852011 392{
b64ec4e4
FZ
393 static const char *whitelist_rw[] = {
394 CONFIG_BDRV_RW_WHITELIST
395 };
396 static const char *whitelist_ro[] = {
397 CONFIG_BDRV_RO_WHITELIST
eb852011
MA
398 };
399 const char **p;
400
b64ec4e4 401 if (!whitelist_rw[0] && !whitelist_ro[0]) {
eb852011 402 return 1; /* no whitelist, anything goes */
b64ec4e4 403 }
eb852011 404
b64ec4e4 405 for (p = whitelist_rw; *p; p++) {
eb852011
MA
406 if (!strcmp(drv->format_name, *p)) {
407 return 1;
408 }
409 }
b64ec4e4
FZ
410 if (read_only) {
411 for (p = whitelist_ro; *p; p++) {
412 if (!strcmp(drv->format_name, *p)) {
413 return 1;
414 }
415 }
416 }
eb852011
MA
417 return 0;
418}
419
b64ec4e4
FZ
420BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
421 bool read_only)
eb852011
MA
422{
423 BlockDriver *drv = bdrv_find_format(format_name);
b64ec4e4 424 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
eb852011
MA
425}
426
5b7e1542
ZYW
427typedef struct CreateCo {
428 BlockDriver *drv;
429 char *filename;
83d0521a 430 QemuOpts *opts;
5b7e1542 431 int ret;
cc84d90f 432 Error *err;
5b7e1542
ZYW
433} CreateCo;
434
435static void coroutine_fn bdrv_create_co_entry(void *opaque)
436{
cc84d90f
HR
437 Error *local_err = NULL;
438 int ret;
439
5b7e1542
ZYW
440 CreateCo *cco = opaque;
441 assert(cco->drv);
442
c282e1fd 443 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
84d18f06 444 if (local_err) {
cc84d90f
HR
445 error_propagate(&cco->err, local_err);
446 }
447 cco->ret = ret;
5b7e1542
ZYW
448}
449
0e7e1989 450int bdrv_create(BlockDriver *drv, const char* filename,
83d0521a 451 QemuOpts *opts, Error **errp)
ea2384d3 452{
5b7e1542
ZYW
453 int ret;
454
455 Coroutine *co;
456 CreateCo cco = {
457 .drv = drv,
458 .filename = g_strdup(filename),
83d0521a 459 .opts = opts,
5b7e1542 460 .ret = NOT_DONE,
cc84d90f 461 .err = NULL,
5b7e1542
ZYW
462 };
463
c282e1fd 464 if (!drv->bdrv_create) {
cc84d90f 465 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
80168bff
LC
466 ret = -ENOTSUP;
467 goto out;
5b7e1542
ZYW
468 }
469
470 if (qemu_in_coroutine()) {
471 /* Fast-path if already in coroutine context */
472 bdrv_create_co_entry(&cco);
473 } else {
474 co = qemu_coroutine_create(bdrv_create_co_entry);
475 qemu_coroutine_enter(co, &cco);
476 while (cco.ret == NOT_DONE) {
b47ec2c4 477 aio_poll(qemu_get_aio_context(), true);
5b7e1542
ZYW
478 }
479 }
480
481 ret = cco.ret;
cc84d90f 482 if (ret < 0) {
84d18f06 483 if (cco.err) {
cc84d90f
HR
484 error_propagate(errp, cco.err);
485 } else {
486 error_setg_errno(errp, -ret, "Could not create image");
487 }
488 }
0e7e1989 489
80168bff
LC
490out:
491 g_free(cco.filename);
5b7e1542 492 return ret;
ea2384d3
FB
493}
494
c282e1fd 495int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
84a12e66
CH
496{
497 BlockDriver *drv;
cc84d90f
HR
498 Error *local_err = NULL;
499 int ret;
84a12e66 500
98289620 501 drv = bdrv_find_protocol(filename, true);
84a12e66 502 if (drv == NULL) {
cc84d90f 503 error_setg(errp, "Could not find protocol for file '%s'", filename);
16905d71 504 return -ENOENT;
84a12e66
CH
505 }
506
c282e1fd 507 ret = bdrv_create(drv, filename, opts, &local_err);
84d18f06 508 if (local_err) {
cc84d90f
HR
509 error_propagate(errp, local_err);
510 }
511 return ret;
84a12e66
CH
512}
513
3baca891 514void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
d34682cd
KW
515{
516 BlockDriver *drv = bs->drv;
3baca891 517 Error *local_err = NULL;
d34682cd
KW
518
519 memset(&bs->bl, 0, sizeof(bs->bl));
520
466ad822 521 if (!drv) {
3baca891 522 return;
466ad822
KW
523 }
524
525 /* Take some limits from the children as a default */
526 if (bs->file) {
3baca891
KW
527 bdrv_refresh_limits(bs->file, &local_err);
528 if (local_err) {
529 error_propagate(errp, local_err);
530 return;
531 }
466ad822 532 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
339064d5
KW
533 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
534 } else {
535 bs->bl.opt_mem_alignment = 512;
466ad822
KW
536 }
537
538 if (bs->backing_hd) {
3baca891
KW
539 bdrv_refresh_limits(bs->backing_hd, &local_err);
540 if (local_err) {
541 error_propagate(errp, local_err);
542 return;
543 }
466ad822
KW
544 bs->bl.opt_transfer_length =
545 MAX(bs->bl.opt_transfer_length,
546 bs->backing_hd->bl.opt_transfer_length);
339064d5
KW
547 bs->bl.opt_mem_alignment =
548 MAX(bs->bl.opt_mem_alignment,
549 bs->backing_hd->bl.opt_mem_alignment);
466ad822
KW
550 }
551
552 /* Then let the driver override it */
553 if (drv->bdrv_refresh_limits) {
3baca891 554 drv->bdrv_refresh_limits(bs, errp);
d34682cd 555 }
d34682cd
KW
556}
557
eba25057
JM
558/*
559 * Create a uniquely-named empty temporary file.
560 * Return 0 upon success, otherwise a negative errno value.
561 */
562int get_tmp_filename(char *filename, int size)
d5249393 563{
eba25057 564#ifdef _WIN32
3b9f94e1 565 char temp_dir[MAX_PATH];
eba25057
JM
566 /* GetTempFileName requires that its output buffer (4th param)
567 have length MAX_PATH or greater. */
568 assert(size >= MAX_PATH);
569 return (GetTempPath(MAX_PATH, temp_dir)
570 && GetTempFileName(temp_dir, "qem", 0, filename)
571 ? 0 : -GetLastError());
d5249393 572#else
67b915a5 573 int fd;
7ccfb2eb 574 const char *tmpdir;
0badc1ee 575 tmpdir = getenv("TMPDIR");
69bef793
AS
576 if (!tmpdir) {
577 tmpdir = "/var/tmp";
578 }
eba25057
JM
579 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
580 return -EOVERFLOW;
581 }
ea2384d3 582 fd = mkstemp(filename);
fe235a06
DH
583 if (fd < 0) {
584 return -errno;
585 }
586 if (close(fd) != 0) {
587 unlink(filename);
eba25057
JM
588 return -errno;
589 }
590 return 0;
d5249393 591#endif
eba25057 592}
fc01f7e7 593
84a12e66
CH
594/*
595 * Detect host devices. By convention, /dev/cdrom[N] is always
596 * recognized as a host CDROM.
597 */
598static BlockDriver *find_hdev_driver(const char *filename)
599{
600 int score_max = 0, score;
601 BlockDriver *drv = NULL, *d;
602
603 QLIST_FOREACH(d, &bdrv_drivers, list) {
604 if (d->bdrv_probe_device) {
605 score = d->bdrv_probe_device(filename);
606 if (score > score_max) {
607 score_max = score;
608 drv = d;
609 }
610 }
611 }
612
613 return drv;
614}
615
98289620
KW
616BlockDriver *bdrv_find_protocol(const char *filename,
617 bool allow_protocol_prefix)
83f64091
FB
618{
619 BlockDriver *drv1;
620 char protocol[128];
1cec71e3 621 int len;
83f64091 622 const char *p;
19cb3738 623
66f82cee
KW
624 /* TODO Drivers without bdrv_file_open must be specified explicitly */
625
39508e7a
CH
626 /*
627 * XXX(hch): we really should not let host device detection
628 * override an explicit protocol specification, but moving this
629 * later breaks access to device names with colons in them.
630 * Thanks to the brain-dead persistent naming schemes on udev-
631 * based Linux systems those actually are quite common.
632 */
633 drv1 = find_hdev_driver(filename);
634 if (drv1) {
635 return drv1;
636 }
637
98289620 638 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
39508e7a 639 return bdrv_find_format("file");
84a12e66 640 }
98289620 641
9e0b22f4
SH
642 p = strchr(filename, ':');
643 assert(p != NULL);
1cec71e3
AL
644 len = p - filename;
645 if (len > sizeof(protocol) - 1)
646 len = sizeof(protocol) - 1;
647 memcpy(protocol, filename, len);
648 protocol[len] = '\0';
8a22f02a 649 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 650 if (drv1->protocol_name &&
8a22f02a 651 !strcmp(drv1->protocol_name, protocol)) {
83f64091 652 return drv1;
8a22f02a 653 }
83f64091
FB
654 }
655 return NULL;
656}
657
f500a6d3 658static int find_image_format(BlockDriverState *bs, const char *filename,
34b5d2c6 659 BlockDriver **pdrv, Error **errp)
f3a5d3f8 660{
f500a6d3 661 int score, score_max;
f3a5d3f8
CH
662 BlockDriver *drv1, *drv;
663 uint8_t buf[2048];
f500a6d3 664 int ret = 0;
f8ea0b00 665
08a00559 666 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
8e895599 667 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
c98ac35d
SW
668 drv = bdrv_find_format("raw");
669 if (!drv) {
34b5d2c6 670 error_setg(errp, "Could not find raw image format");
c98ac35d
SW
671 ret = -ENOENT;
672 }
673 *pdrv = drv;
674 return ret;
1a396859 675 }
f8ea0b00 676
83f64091 677 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
83f64091 678 if (ret < 0) {
34b5d2c6
HR
679 error_setg_errno(errp, -ret, "Could not read image for determining its "
680 "format");
c98ac35d
SW
681 *pdrv = NULL;
682 return ret;
83f64091
FB
683 }
684
ea2384d3 685 score_max = 0;
84a12e66 686 drv = NULL;
8a22f02a 687 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
688 if (drv1->bdrv_probe) {
689 score = drv1->bdrv_probe(buf, ret, filename);
690 if (score > score_max) {
691 score_max = score;
692 drv = drv1;
693 }
0849bf08 694 }
fc01f7e7 695 }
c98ac35d 696 if (!drv) {
34b5d2c6
HR
697 error_setg(errp, "Could not determine image format: No compatible "
698 "driver found");
c98ac35d
SW
699 ret = -ENOENT;
700 }
701 *pdrv = drv;
702 return ret;
ea2384d3
FB
703}
704
51762288
SH
705/**
706 * Set the current 'total_sectors' value
65a9bb25 707 * Return 0 on success, -errno on error.
51762288
SH
708 */
709static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
710{
711 BlockDriver *drv = bs->drv;
712
396759ad
NB
713 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
714 if (bs->sg)
715 return 0;
716
51762288
SH
717 /* query actual device if possible, otherwise just trust the hint */
718 if (drv->bdrv_getlength) {
719 int64_t length = drv->bdrv_getlength(bs);
720 if (length < 0) {
721 return length;
722 }
7e382003 723 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
51762288
SH
724 }
725
726 bs->total_sectors = hint;
727 return 0;
728}
729
9e8f1835
PB
730/**
731 * Set open flags for a given discard mode
732 *
733 * Return 0 on success, -1 if the discard mode was invalid.
734 */
735int bdrv_parse_discard_flags(const char *mode, int *flags)
736{
737 *flags &= ~BDRV_O_UNMAP;
738
739 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
740 /* do nothing */
741 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
742 *flags |= BDRV_O_UNMAP;
743 } else {
744 return -1;
745 }
746
747 return 0;
748}
749
c3993cdc
SH
750/**
751 * Set open flags for a given cache mode
752 *
753 * Return 0 on success, -1 if the cache mode was invalid.
754 */
755int bdrv_parse_cache_flags(const char *mode, int *flags)
756{
757 *flags &= ~BDRV_O_CACHE_MASK;
758
759 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
760 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
761 } else if (!strcmp(mode, "directsync")) {
762 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
763 } else if (!strcmp(mode, "writeback")) {
764 *flags |= BDRV_O_CACHE_WB;
765 } else if (!strcmp(mode, "unsafe")) {
766 *flags |= BDRV_O_CACHE_WB;
767 *flags |= BDRV_O_NO_FLUSH;
768 } else if (!strcmp(mode, "writethrough")) {
769 /* this is the default */
770 } else {
771 return -1;
772 }
773
774 return 0;
775}
776
53fec9d3
SH
777/**
778 * The copy-on-read flag is actually a reference count so multiple users may
779 * use the feature without worrying about clobbering its previous state.
780 * Copy-on-read stays enabled until all users have called to disable it.
781 */
782void bdrv_enable_copy_on_read(BlockDriverState *bs)
783{
784 bs->copy_on_read++;
785}
786
787void bdrv_disable_copy_on_read(BlockDriverState *bs)
788{
789 assert(bs->copy_on_read > 0);
790 bs->copy_on_read--;
791}
792
b1e6fc08
KW
793/*
794 * Returns the flags that a temporary snapshot should get, based on the
795 * originally requested flags (the originally requested image will have flags
796 * like a backing file)
797 */
798static int bdrv_temp_snapshot_flags(int flags)
799{
800 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
801}
802
0b50cc88
KW
803/*
804 * Returns the flags that bs->file should get, based on the given flags for
805 * the parent BDS
806 */
807static int bdrv_inherited_flags(int flags)
808{
809 /* Enable protocol handling, disable format probing for bs->file */
810 flags |= BDRV_O_PROTOCOL;
811
812 /* Our block drivers take care to send flushes and respect unmap policy,
813 * so we can enable both unconditionally on lower layers. */
814 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
815
0b50cc88 816 /* Clear flags that only apply to the top layer */
5669b44d 817 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
0b50cc88
KW
818
819 return flags;
820}
821
317fc44e
KW
822/*
823 * Returns the flags that bs->backing_hd should get, based on the given flags
824 * for the parent BDS
825 */
826static int bdrv_backing_flags(int flags)
827{
828 /* backing files always opened read-only */
829 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
830
831 /* snapshot=on is handled on the top layer */
8bfea15d 832 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
317fc44e
KW
833
834 return flags;
835}
836
7b272452
KW
837static int bdrv_open_flags(BlockDriverState *bs, int flags)
838{
839 int open_flags = flags | BDRV_O_CACHE_WB;
840
841 /*
842 * Clear flags that are internal to the block layer before opening the
843 * image.
844 */
20cca275 845 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
7b272452
KW
846
847 /*
848 * Snapshots should be writable.
849 */
8bfea15d 850 if (flags & BDRV_O_TEMPORARY) {
7b272452
KW
851 open_flags |= BDRV_O_RDWR;
852 }
853
854 return open_flags;
855}
856
636ea370
KW
857static void bdrv_assign_node_name(BlockDriverState *bs,
858 const char *node_name,
859 Error **errp)
6913c0c2
BC
860{
861 if (!node_name) {
636ea370 862 return;
6913c0c2
BC
863 }
864
865 /* empty string node name is invalid */
866 if (node_name[0] == '\0') {
867 error_setg(errp, "Empty node name");
636ea370 868 return;
6913c0c2
BC
869 }
870
0c5e94ee
BC
871 /* takes care of avoiding namespaces collisions */
872 if (bdrv_find(node_name)) {
873 error_setg(errp, "node-name=%s is conflicting with a device id",
874 node_name);
636ea370 875 return;
0c5e94ee
BC
876 }
877
6913c0c2
BC
878 /* takes care of avoiding duplicates node names */
879 if (bdrv_find_node(node_name)) {
880 error_setg(errp, "Duplicate node name");
636ea370 881 return;
6913c0c2
BC
882 }
883
884 /* copy node name into the bs and insert it into the graph list */
885 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
886 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
6913c0c2
BC
887}
888
57915332
KW
889/*
890 * Common part for opening disk images and files
b6ad491a
KW
891 *
892 * Removes all processed options from *options.
57915332 893 */
f500a6d3 894static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
34b5d2c6 895 QDict *options, int flags, BlockDriver *drv, Error **errp)
57915332
KW
896{
897 int ret, open_flags;
035fccdf 898 const char *filename;
6913c0c2 899 const char *node_name = NULL;
34b5d2c6 900 Error *local_err = NULL;
57915332
KW
901
902 assert(drv != NULL);
6405875c 903 assert(bs->file == NULL);
707ff828 904 assert(options != NULL && bs->options != options);
57915332 905
45673671
KW
906 if (file != NULL) {
907 filename = file->filename;
908 } else {
909 filename = qdict_get_try_str(options, "filename");
910 }
911
765003db
KW
912 if (drv->bdrv_needs_filename && !filename) {
913 error_setg(errp, "The '%s' block driver requires a file name",
914 drv->format_name);
915 return -EINVAL;
916 }
917
45673671 918 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
28dcee10 919
6913c0c2 920 node_name = qdict_get_try_str(options, "node-name");
636ea370 921 bdrv_assign_node_name(bs, node_name, &local_err);
0fb6395c 922 if (local_err) {
636ea370
KW
923 error_propagate(errp, local_err);
924 return -EINVAL;
6913c0c2
BC
925 }
926 qdict_del(options, "node-name");
927
5d186eb0
KW
928 /* bdrv_open() with directly using a protocol as drv. This layer is already
929 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
930 * and return immediately. */
931 if (file != NULL && drv->bdrv_file_open) {
932 bdrv_swap(file, bs);
933 return 0;
934 }
935
57915332 936 bs->open_flags = flags;
1b7fd729 937 bs->guest_block_size = 512;
c25f53b0 938 bs->request_alignment = 512;
0d51b4de 939 bs->zero_beyond_eof = true;
b64ec4e4
FZ
940 open_flags = bdrv_open_flags(bs, flags);
941 bs->read_only = !(open_flags & BDRV_O_RDWR);
20cca275 942 bs->growable = !!(flags & BDRV_O_PROTOCOL);
b64ec4e4
FZ
943
944 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
8f94a6e4
KW
945 error_setg(errp,
946 !bs->read_only && bdrv_is_whitelisted(drv, true)
947 ? "Driver '%s' can only be used for read-only devices"
948 : "Driver '%s' is not whitelisted",
949 drv->format_name);
b64ec4e4
FZ
950 return -ENOTSUP;
951 }
57915332 952
53fec9d3 953 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
0ebd24e0
KW
954 if (flags & BDRV_O_COPY_ON_READ) {
955 if (!bs->read_only) {
956 bdrv_enable_copy_on_read(bs);
957 } else {
958 error_setg(errp, "Can't use copy-on-read on read-only device");
959 return -EINVAL;
960 }
53fec9d3
SH
961 }
962
c2ad1b0c
KW
963 if (filename != NULL) {
964 pstrcpy(bs->filename, sizeof(bs->filename), filename);
965 } else {
966 bs->filename[0] = '\0';
967 }
91af7014 968 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
57915332 969
57915332 970 bs->drv = drv;
7267c094 971 bs->opaque = g_malloc0(drv->instance_size);
57915332 972
03f541bd 973 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
e7c63796 974
66f82cee
KW
975 /* Open the image, either directly or using a protocol */
976 if (drv->bdrv_file_open) {
5d186eb0 977 assert(file == NULL);
030be321 978 assert(!drv->bdrv_needs_filename || filename != NULL);
34b5d2c6 979 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
f500a6d3 980 } else {
2af5ef70 981 if (file == NULL) {
34b5d2c6
HR
982 error_setg(errp, "Can't use '%s' as a block driver for the "
983 "protocol level", drv->format_name);
2af5ef70
KW
984 ret = -EINVAL;
985 goto free_and_fail;
986 }
f500a6d3 987 bs->file = file;
34b5d2c6 988 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
66f82cee
KW
989 }
990
57915332 991 if (ret < 0) {
84d18f06 992 if (local_err) {
34b5d2c6 993 error_propagate(errp, local_err);
2fa9aa59
DH
994 } else if (bs->filename[0]) {
995 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
34b5d2c6
HR
996 } else {
997 error_setg_errno(errp, -ret, "Could not open image");
998 }
57915332
KW
999 goto free_and_fail;
1000 }
1001
51762288
SH
1002 ret = refresh_total_sectors(bs, bs->total_sectors);
1003 if (ret < 0) {
34b5d2c6 1004 error_setg_errno(errp, -ret, "Could not refresh total sector count");
51762288 1005 goto free_and_fail;
57915332 1006 }
51762288 1007
3baca891
KW
1008 bdrv_refresh_limits(bs, &local_err);
1009 if (local_err) {
1010 error_propagate(errp, local_err);
1011 ret = -EINVAL;
1012 goto free_and_fail;
1013 }
1014
c25f53b0 1015 assert(bdrv_opt_mem_align(bs) != 0);
47ea2de2 1016 assert((bs->request_alignment != 0) || bs->sg);
57915332
KW
1017 return 0;
1018
1019free_and_fail:
f500a6d3 1020 bs->file = NULL;
7267c094 1021 g_free(bs->opaque);
57915332
KW
1022 bs->opaque = NULL;
1023 bs->drv = NULL;
1024 return ret;
1025}
1026
5e5c4f63
KW
1027static QDict *parse_json_filename(const char *filename, Error **errp)
1028{
1029 QObject *options_obj;
1030 QDict *options;
1031 int ret;
1032
1033 ret = strstart(filename, "json:", &filename);
1034 assert(ret);
1035
1036 options_obj = qobject_from_json(filename);
1037 if (!options_obj) {
1038 error_setg(errp, "Could not parse the JSON options");
1039 return NULL;
1040 }
1041
1042 if (qobject_type(options_obj) != QTYPE_QDICT) {
1043 qobject_decref(options_obj);
1044 error_setg(errp, "Invalid JSON object given");
1045 return NULL;
1046 }
1047
1048 options = qobject_to_qdict(options_obj);
1049 qdict_flatten(options);
1050
1051 return options;
1052}
1053
b6ce07aa 1054/*
f54120ff
KW
1055 * Fills in default options for opening images and converts the legacy
1056 * filename/flags pair to option QDict entries.
b6ce07aa 1057 */
5e5c4f63 1058static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
17b005f1 1059 BlockDriver *drv, Error **errp)
ea2384d3 1060{
5e5c4f63 1061 const char *filename = *pfilename;
c2ad1b0c 1062 const char *drvname;
462f5bcf 1063 bool protocol = flags & BDRV_O_PROTOCOL;
e3fa4bfa 1064 bool parse_filename = false;
34b5d2c6 1065 Error *local_err = NULL;
83f64091 1066
5e5c4f63
KW
1067 /* Parse json: pseudo-protocol */
1068 if (filename && g_str_has_prefix(filename, "json:")) {
1069 QDict *json_options = parse_json_filename(filename, &local_err);
1070 if (local_err) {
1071 error_propagate(errp, local_err);
1072 return -EINVAL;
1073 }
1074
1075 /* Options given in the filename have lower priority than options
1076 * specified directly */
1077 qdict_join(*options, json_options, false);
1078 QDECREF(json_options);
1079 *pfilename = filename = NULL;
1080 }
1081
035fccdf 1082 /* Fetch the file name from the options QDict if necessary */
17b005f1 1083 if (protocol && filename) {
f54120ff
KW
1084 if (!qdict_haskey(*options, "filename")) {
1085 qdict_put(*options, "filename", qstring_from_str(filename));
1086 parse_filename = true;
1087 } else {
1088 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1089 "the same time");
1090 return -EINVAL;
1091 }
035fccdf
KW
1092 }
1093
c2ad1b0c 1094 /* Find the right block driver */
f54120ff 1095 filename = qdict_get_try_str(*options, "filename");
5acd9d81 1096 drvname = qdict_get_try_str(*options, "driver");
f54120ff 1097
17b005f1
KW
1098 if (drv) {
1099 if (drvname) {
1100 error_setg(errp, "Driver specified twice");
1101 return -EINVAL;
1102 }
1103 drvname = drv->format_name;
1104 qdict_put(*options, "driver", qstring_from_str(drvname));
1105 } else {
1106 if (!drvname && protocol) {
1107 if (filename) {
1108 drv = bdrv_find_protocol(filename, parse_filename);
1109 if (!drv) {
1110 error_setg(errp, "Unknown protocol");
1111 return -EINVAL;
1112 }
1113
1114 drvname = drv->format_name;
1115 qdict_put(*options, "driver", qstring_from_str(drvname));
1116 } else {
1117 error_setg(errp, "Must specify either driver or file");
f54120ff
KW
1118 return -EINVAL;
1119 }
17b005f1
KW
1120 } else if (drvname) {
1121 drv = bdrv_find_format(drvname);
1122 if (!drv) {
1123 error_setg(errp, "Unknown driver '%s'", drvname);
1124 return -ENOENT;
1125 }
98289620 1126 }
c2ad1b0c
KW
1127 }
1128
17b005f1 1129 assert(drv || !protocol);
c2ad1b0c 1130
f54120ff 1131 /* Driver-specific filename parsing */
17b005f1 1132 if (drv && drv->bdrv_parse_filename && parse_filename) {
5acd9d81 1133 drv->bdrv_parse_filename(filename, *options, &local_err);
84d18f06 1134 if (local_err) {
34b5d2c6 1135 error_propagate(errp, local_err);
f54120ff 1136 return -EINVAL;
6963a30d 1137 }
cd5d031e
HR
1138
1139 if (!drv->bdrv_needs_filename) {
1140 qdict_del(*options, "filename");
cd5d031e 1141 }
6963a30d
KW
1142 }
1143
f54120ff
KW
1144 return 0;
1145}
1146
8d24cce1
FZ
1147void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1148{
1149
826b6ca0
FZ
1150 if (bs->backing_hd) {
1151 assert(bs->backing_blocker);
1152 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1153 } else if (backing_hd) {
1154 error_setg(&bs->backing_blocker,
1155 "device is used as backing hd of '%s'",
1156 bs->device_name);
1157 }
1158
8d24cce1
FZ
1159 bs->backing_hd = backing_hd;
1160 if (!backing_hd) {
826b6ca0
FZ
1161 error_free(bs->backing_blocker);
1162 bs->backing_blocker = NULL;
8d24cce1
FZ
1163 goto out;
1164 }
1165 bs->open_flags &= ~BDRV_O_NO_BACKING;
1166 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1167 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1168 backing_hd->drv ? backing_hd->drv->format_name : "");
826b6ca0
FZ
1169
1170 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1171 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1172 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1173 bs->backing_blocker);
8d24cce1 1174out:
3baca891 1175 bdrv_refresh_limits(bs, NULL);
8d24cce1
FZ
1176}
1177
31ca6d07
KW
1178/*
1179 * Opens the backing file for a BlockDriverState if not yet open
1180 *
1181 * options is a QDict of options to pass to the block drivers, or NULL for an
1182 * empty set of options. The reference to the QDict is transferred to this
1183 * function (even on failure), so if the caller intends to reuse the dictionary,
1184 * it needs to use QINCREF() before calling bdrv_file_open.
1185 */
34b5d2c6 1186int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
9156df12 1187{
1ba4b6a5 1188 char *backing_filename = g_malloc0(PATH_MAX);
317fc44e 1189 int ret = 0;
9156df12 1190 BlockDriver *back_drv = NULL;
8d24cce1 1191 BlockDriverState *backing_hd;
34b5d2c6 1192 Error *local_err = NULL;
9156df12
PB
1193
1194 if (bs->backing_hd != NULL) {
31ca6d07 1195 QDECREF(options);
1ba4b6a5 1196 goto free_exit;
9156df12
PB
1197 }
1198
31ca6d07
KW
1199 /* NULL means an empty set of options */
1200 if (options == NULL) {
1201 options = qdict_new();
1202 }
1203
9156df12 1204 bs->open_flags &= ~BDRV_O_NO_BACKING;
1cb6f506
KW
1205 if (qdict_haskey(options, "file.filename")) {
1206 backing_filename[0] = '\0';
1207 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
31ca6d07 1208 QDECREF(options);
1ba4b6a5 1209 goto free_exit;
dbecebdd 1210 } else {
1ba4b6a5 1211 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
9156df12
PB
1212 }
1213
8ee79e70
KW
1214 if (!bs->drv || !bs->drv->supports_backing) {
1215 ret = -EINVAL;
1216 error_setg(errp, "Driver doesn't support backing files");
1217 QDECREF(options);
1218 goto free_exit;
1219 }
1220
8d24cce1
FZ
1221 backing_hd = bdrv_new("", errp);
1222
9156df12
PB
1223 if (bs->backing_format[0] != '\0') {
1224 back_drv = bdrv_find_format(bs->backing_format);
1225 }
1226
f67503e5 1227 assert(bs->backing_hd == NULL);
8d24cce1 1228 ret = bdrv_open(&backing_hd,
ddf5636d 1229 *backing_filename ? backing_filename : NULL, NULL, options,
317fc44e 1230 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
9156df12 1231 if (ret < 0) {
8d24cce1
FZ
1232 bdrv_unref(backing_hd);
1233 backing_hd = NULL;
9156df12 1234 bs->open_flags |= BDRV_O_NO_BACKING;
b04b6b6e
FZ
1235 error_setg(errp, "Could not open backing file: %s",
1236 error_get_pretty(local_err));
1237 error_free(local_err);
1ba4b6a5 1238 goto free_exit;
9156df12 1239 }
8d24cce1 1240 bdrv_set_backing_hd(bs, backing_hd);
d80ac658 1241
1ba4b6a5
BC
1242free_exit:
1243 g_free(backing_filename);
1244 return ret;
9156df12
PB
1245}
1246
da557aac
HR
1247/*
1248 * Opens a disk image whose options are given as BlockdevRef in another block
1249 * device's options.
1250 *
da557aac
HR
1251 * If allow_none is true, no image will be opened if filename is false and no
1252 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1253 *
1254 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1255 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1256 * itself, all options starting with "${bdref_key}." are considered part of the
1257 * BlockdevRef.
1258 *
1259 * The BlockdevRef will be removed from the options QDict.
f67503e5
HR
1260 *
1261 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
da557aac
HR
1262 */
1263int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1264 QDict *options, const char *bdref_key, int flags,
f7d9fd8c 1265 bool allow_none, Error **errp)
da557aac
HR
1266{
1267 QDict *image_options;
1268 int ret;
1269 char *bdref_key_dot;
1270 const char *reference;
1271
f67503e5
HR
1272 assert(pbs);
1273 assert(*pbs == NULL);
1274
da557aac
HR
1275 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1276 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1277 g_free(bdref_key_dot);
1278
1279 reference = qdict_get_try_str(options, bdref_key);
1280 if (!filename && !reference && !qdict_size(image_options)) {
1281 if (allow_none) {
1282 ret = 0;
1283 } else {
1284 error_setg(errp, "A block device must be specified for \"%s\"",
1285 bdref_key);
1286 ret = -EINVAL;
1287 }
b20e61e0 1288 QDECREF(image_options);
da557aac
HR
1289 goto done;
1290 }
1291
f7d9fd8c 1292 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
da557aac
HR
1293
1294done:
1295 qdict_del(options, bdref_key);
1296 return ret;
1297}
1298
6b8aeca5 1299int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
b998875d
KW
1300{
1301 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1ba4b6a5 1302 char *tmp_filename = g_malloc0(PATH_MAX + 1);
b998875d
KW
1303 int64_t total_size;
1304 BlockDriver *bdrv_qcow2;
83d0521a 1305 QemuOpts *opts = NULL;
b998875d
KW
1306 QDict *snapshot_options;
1307 BlockDriverState *bs_snapshot;
1308 Error *local_err;
1309 int ret;
1310
1311 /* if snapshot, we create a temporary backing file and open it
1312 instead of opening 'filename' directly */
1313
1314 /* Get the required size from the image */
f187743a
KW
1315 total_size = bdrv_getlength(bs);
1316 if (total_size < 0) {
6b8aeca5 1317 ret = total_size;
f187743a 1318 error_setg_errno(errp, -total_size, "Could not get image size");
1ba4b6a5 1319 goto out;
f187743a 1320 }
b998875d
KW
1321
1322 /* Create the temporary image */
1ba4b6a5 1323 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
b998875d
KW
1324 if (ret < 0) {
1325 error_setg_errno(errp, -ret, "Could not get temporary filename");
1ba4b6a5 1326 goto out;
b998875d
KW
1327 }
1328
1329 bdrv_qcow2 = bdrv_find_format("qcow2");
c282e1fd
CL
1330 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1331 &error_abort);
83d0521a 1332 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
c282e1fd 1333 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
83d0521a 1334 qemu_opts_del(opts);
b998875d
KW
1335 if (ret < 0) {
1336 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1337 "'%s': %s", tmp_filename,
1338 error_get_pretty(local_err));
1339 error_free(local_err);
1ba4b6a5 1340 goto out;
b998875d
KW
1341 }
1342
1343 /* Prepare a new options QDict for the temporary file */
1344 snapshot_options = qdict_new();
1345 qdict_put(snapshot_options, "file.driver",
1346 qstring_from_str("file"));
1347 qdict_put(snapshot_options, "file.filename",
1348 qstring_from_str(tmp_filename));
1349
98522f63 1350 bs_snapshot = bdrv_new("", &error_abort);
b998875d
KW
1351
1352 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
b1e6fc08 1353 flags, bdrv_qcow2, &local_err);
b998875d
KW
1354 if (ret < 0) {
1355 error_propagate(errp, local_err);
1ba4b6a5 1356 goto out;
b998875d
KW
1357 }
1358
1359 bdrv_append(bs_snapshot, bs);
1ba4b6a5
BC
1360
1361out:
1362 g_free(tmp_filename);
6b8aeca5 1363 return ret;
b998875d
KW
1364}
1365
b6ce07aa
KW
1366/*
1367 * Opens a disk image (raw, qcow2, vmdk, ...)
de9c0cec
KW
1368 *
1369 * options is a QDict of options to pass to the block drivers, or NULL for an
1370 * empty set of options. The reference to the QDict belongs to the block layer
1371 * after the call (even on failure), so if the caller intends to reuse the
1372 * dictionary, it needs to use QINCREF() before calling bdrv_open.
f67503e5
HR
1373 *
1374 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1375 * If it is not NULL, the referenced BDS will be reused.
ddf5636d
HR
1376 *
1377 * The reference parameter may be used to specify an existing block device which
1378 * should be opened. If specified, neither options nor a filename may be given,
1379 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
b6ce07aa 1380 */
ddf5636d
HR
1381int bdrv_open(BlockDriverState **pbs, const char *filename,
1382 const char *reference, QDict *options, int flags,
1383 BlockDriver *drv, Error **errp)
ea2384d3 1384{
b6ce07aa 1385 int ret;
f67503e5 1386 BlockDriverState *file = NULL, *bs;
74fe54f2 1387 const char *drvname;
34b5d2c6 1388 Error *local_err = NULL;
b1e6fc08 1389 int snapshot_flags = 0;
712e7874 1390
f67503e5
HR
1391 assert(pbs);
1392
ddf5636d
HR
1393 if (reference) {
1394 bool options_non_empty = options ? qdict_size(options) : false;
1395 QDECREF(options);
1396
1397 if (*pbs) {
1398 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1399 "another block device");
1400 return -EINVAL;
1401 }
1402
1403 if (filename || options_non_empty) {
1404 error_setg(errp, "Cannot reference an existing block device with "
1405 "additional options or a new filename");
1406 return -EINVAL;
1407 }
1408
1409 bs = bdrv_lookup_bs(reference, reference, errp);
1410 if (!bs) {
1411 return -ENODEV;
1412 }
1413 bdrv_ref(bs);
1414 *pbs = bs;
1415 return 0;
1416 }
1417
f67503e5
HR
1418 if (*pbs) {
1419 bs = *pbs;
1420 } else {
98522f63 1421 bs = bdrv_new("", &error_abort);
f67503e5
HR
1422 }
1423
de9c0cec
KW
1424 /* NULL means an empty set of options */
1425 if (options == NULL) {
1426 options = qdict_new();
1427 }
1428
17b005f1 1429 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
462f5bcf
KW
1430 if (local_err) {
1431 goto fail;
1432 }
1433
76c591b0
KW
1434 /* Find the right image format driver */
1435 drv = NULL;
1436 drvname = qdict_get_try_str(options, "driver");
1437 if (drvname) {
1438 drv = bdrv_find_format(drvname);
1439 qdict_del(options, "driver");
1440 if (!drv) {
1441 error_setg(errp, "Unknown driver: '%s'", drvname);
1442 ret = -EINVAL;
1443 goto fail;
1444 }
1445 }
1446
1447 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1448 if (drv && !drv->bdrv_file_open) {
1449 /* If the user explicitly wants a format driver here, we'll need to add
1450 * another layer for the protocol in bs->file */
1451 flags &= ~BDRV_O_PROTOCOL;
1452 }
1453
de9c0cec 1454 bs->options = options;
b6ad491a 1455 options = qdict_clone_shallow(options);
de9c0cec 1456
f500a6d3 1457 /* Open image file without format layer */
f4788adc
KW
1458 if ((flags & BDRV_O_PROTOCOL) == 0) {
1459 if (flags & BDRV_O_RDWR) {
1460 flags |= BDRV_O_ALLOW_RDWR;
1461 }
1462 if (flags & BDRV_O_SNAPSHOT) {
1463 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1464 flags = bdrv_backing_flags(flags);
1465 }
f500a6d3 1466
f4788adc
KW
1467 assert(file == NULL);
1468 ret = bdrv_open_image(&file, filename, options, "file",
1469 bdrv_inherited_flags(flags),
1470 true, &local_err);
1471 if (ret < 0) {
1472 goto fail;
1473 }
f500a6d3
KW
1474 }
1475
76c591b0
KW
1476 /* Image format probing */
1477 if (!drv && file) {
17b005f1
KW
1478 ret = find_image_format(file, filename, &drv, &local_err);
1479 if (ret < 0) {
8bfea15d 1480 goto fail;
2a05cbe4 1481 }
76c591b0 1482 } else if (!drv) {
17b005f1
KW
1483 error_setg(errp, "Must specify either driver or file");
1484 ret = -EINVAL;
8bfea15d 1485 goto fail;
ea2384d3 1486 }
b6ce07aa
KW
1487
1488 /* Open the image */
34b5d2c6 1489 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
b6ce07aa 1490 if (ret < 0) {
8bfea15d 1491 goto fail;
6987307c
CH
1492 }
1493
2a05cbe4 1494 if (file && (bs->file != file)) {
4f6fd349 1495 bdrv_unref(file);
f500a6d3
KW
1496 file = NULL;
1497 }
1498
b6ce07aa 1499 /* If there is a backing file, use it */
9156df12 1500 if ((flags & BDRV_O_NO_BACKING) == 0) {
31ca6d07
KW
1501 QDict *backing_options;
1502
5726d872 1503 qdict_extract_subqdict(options, &backing_options, "backing.");
34b5d2c6 1504 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
b6ce07aa 1505 if (ret < 0) {
b6ad491a 1506 goto close_and_fail;
b6ce07aa 1507 }
b6ce07aa
KW
1508 }
1509
91af7014
HR
1510 bdrv_refresh_filename(bs);
1511
b998875d
KW
1512 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1513 * temporary snapshot afterwards. */
b1e6fc08 1514 if (snapshot_flags) {
6b8aeca5 1515 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
b998875d 1516 if (local_err) {
b998875d
KW
1517 goto close_and_fail;
1518 }
1519 }
1520
b6ad491a 1521 /* Check if any unknown options were used */
5acd9d81 1522 if (options && (qdict_size(options) != 0)) {
b6ad491a 1523 const QDictEntry *entry = qdict_first(options);
5acd9d81
HR
1524 if (flags & BDRV_O_PROTOCOL) {
1525 error_setg(errp, "Block protocol '%s' doesn't support the option "
1526 "'%s'", drv->format_name, entry->key);
1527 } else {
1528 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1529 "support the option '%s'", drv->format_name,
1530 bs->device_name, entry->key);
1531 }
b6ad491a
KW
1532
1533 ret = -EINVAL;
1534 goto close_and_fail;
1535 }
b6ad491a 1536
b6ce07aa 1537 if (!bdrv_key_required(bs)) {
7d4b4ba5 1538 bdrv_dev_change_media_cb(bs, true);
c3adb58f
MA
1539 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1540 && !runstate_check(RUN_STATE_INMIGRATE)
1541 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1542 error_setg(errp,
1543 "Guest must be stopped for opening of encrypted image");
1544 ret = -EBUSY;
1545 goto close_and_fail;
b6ce07aa
KW
1546 }
1547
c3adb58f 1548 QDECREF(options);
f67503e5 1549 *pbs = bs;
b6ce07aa
KW
1550 return 0;
1551
8bfea15d 1552fail:
f500a6d3 1553 if (file != NULL) {
4f6fd349 1554 bdrv_unref(file);
f500a6d3 1555 }
de9c0cec 1556 QDECREF(bs->options);
b6ad491a 1557 QDECREF(options);
de9c0cec 1558 bs->options = NULL;
f67503e5
HR
1559 if (!*pbs) {
1560 /* If *pbs is NULL, a new BDS has been created in this function and
1561 needs to be freed now. Otherwise, it does not need to be closed,
1562 since it has not really been opened yet. */
1563 bdrv_unref(bs);
1564 }
84d18f06 1565 if (local_err) {
34b5d2c6
HR
1566 error_propagate(errp, local_err);
1567 }
b6ad491a 1568 return ret;
de9c0cec 1569
b6ad491a 1570close_and_fail:
f67503e5
HR
1571 /* See fail path, but now the BDS has to be always closed */
1572 if (*pbs) {
1573 bdrv_close(bs);
1574 } else {
1575 bdrv_unref(bs);
1576 }
b6ad491a 1577 QDECREF(options);
84d18f06 1578 if (local_err) {
34b5d2c6
HR
1579 error_propagate(errp, local_err);
1580 }
b6ce07aa
KW
1581 return ret;
1582}
1583
e971aa12
JC
1584typedef struct BlockReopenQueueEntry {
1585 bool prepared;
1586 BDRVReopenState state;
1587 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1588} BlockReopenQueueEntry;
1589
1590/*
1591 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1592 * reopen of multiple devices.
1593 *
1594 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1595 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1596 * be created and initialized. This newly created BlockReopenQueue should be
1597 * passed back in for subsequent calls that are intended to be of the same
1598 * atomic 'set'.
1599 *
1600 * bs is the BlockDriverState to add to the reopen queue.
1601 *
1602 * flags contains the open flags for the associated bs
1603 *
1604 * returns a pointer to bs_queue, which is either the newly allocated
1605 * bs_queue, or the existing bs_queue being used.
1606 *
1607 */
1608BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1609 BlockDriverState *bs, int flags)
1610{
1611 assert(bs != NULL);
1612
1613 BlockReopenQueueEntry *bs_entry;
1614 if (bs_queue == NULL) {
1615 bs_queue = g_new0(BlockReopenQueue, 1);
1616 QSIMPLEQ_INIT(bs_queue);
1617 }
1618
f1f25a2e
KW
1619 /* bdrv_open() masks this flag out */
1620 flags &= ~BDRV_O_PROTOCOL;
1621
e971aa12 1622 if (bs->file) {
f1f25a2e 1623 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
e971aa12
JC
1624 }
1625
1626 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1627 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1628
1629 bs_entry->state.bs = bs;
1630 bs_entry->state.flags = flags;
1631
1632 return bs_queue;
1633}
1634
1635/*
1636 * Reopen multiple BlockDriverStates atomically & transactionally.
1637 *
1638 * The queue passed in (bs_queue) must have been built up previous
1639 * via bdrv_reopen_queue().
1640 *
1641 * Reopens all BDS specified in the queue, with the appropriate
1642 * flags. All devices are prepared for reopen, and failure of any
1643 * device will cause all device changes to be abandonded, and intermediate
1644 * data cleaned up.
1645 *
1646 * If all devices prepare successfully, then the changes are committed
1647 * to all devices.
1648 *
1649 */
1650int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1651{
1652 int ret = -1;
1653 BlockReopenQueueEntry *bs_entry, *next;
1654 Error *local_err = NULL;
1655
1656 assert(bs_queue != NULL);
1657
1658 bdrv_drain_all();
1659
1660 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1661 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1662 error_propagate(errp, local_err);
1663 goto cleanup;
1664 }
1665 bs_entry->prepared = true;
1666 }
1667
1668 /* If we reach this point, we have success and just need to apply the
1669 * changes
1670 */
1671 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1672 bdrv_reopen_commit(&bs_entry->state);
1673 }
1674
1675 ret = 0;
1676
1677cleanup:
1678 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1679 if (ret && bs_entry->prepared) {
1680 bdrv_reopen_abort(&bs_entry->state);
1681 }
1682 g_free(bs_entry);
1683 }
1684 g_free(bs_queue);
1685 return ret;
1686}
1687
1688
1689/* Reopen a single BlockDriverState with the specified flags. */
1690int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1691{
1692 int ret = -1;
1693 Error *local_err = NULL;
1694 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1695
1696 ret = bdrv_reopen_multiple(queue, &local_err);
1697 if (local_err != NULL) {
1698 error_propagate(errp, local_err);
1699 }
1700 return ret;
1701}
1702
1703
1704/*
1705 * Prepares a BlockDriverState for reopen. All changes are staged in the
1706 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1707 * the block driver layer .bdrv_reopen_prepare()
1708 *
1709 * bs is the BlockDriverState to reopen
1710 * flags are the new open flags
1711 * queue is the reopen queue
1712 *
1713 * Returns 0 on success, non-zero on error. On error errp will be set
1714 * as well.
1715 *
1716 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1717 * It is the responsibility of the caller to then call the abort() or
1718 * commit() for any other BDS that have been left in a prepare() state
1719 *
1720 */
1721int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1722 Error **errp)
1723{
1724 int ret = -1;
1725 Error *local_err = NULL;
1726 BlockDriver *drv;
1727
1728 assert(reopen_state != NULL);
1729 assert(reopen_state->bs->drv != NULL);
1730 drv = reopen_state->bs->drv;
1731
1732 /* if we are to stay read-only, do not allow permission change
1733 * to r/w */
1734 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1735 reopen_state->flags & BDRV_O_RDWR) {
1736 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1737 reopen_state->bs->device_name);
1738 goto error;
1739 }
1740
1741
1742 ret = bdrv_flush(reopen_state->bs);
1743 if (ret) {
1744 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1745 strerror(-ret));
1746 goto error;
1747 }
1748
1749 if (drv->bdrv_reopen_prepare) {
1750 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1751 if (ret) {
1752 if (local_err != NULL) {
1753 error_propagate(errp, local_err);
1754 } else {
d8b6895f
LC
1755 error_setg(errp, "failed while preparing to reopen image '%s'",
1756 reopen_state->bs->filename);
e971aa12
JC
1757 }
1758 goto error;
1759 }
1760 } else {
1761 /* It is currently mandatory to have a bdrv_reopen_prepare()
1762 * handler for each supported drv. */
1763 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1764 drv->format_name, reopen_state->bs->device_name,
1765 "reopening of file");
1766 ret = -1;
1767 goto error;
1768 }
1769
1770 ret = 0;
1771
1772error:
1773 return ret;
1774}
1775
1776/*
1777 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1778 * makes them final by swapping the staging BlockDriverState contents into
1779 * the active BlockDriverState contents.
1780 */
1781void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1782{
1783 BlockDriver *drv;
1784
1785 assert(reopen_state != NULL);
1786 drv = reopen_state->bs->drv;
1787 assert(drv != NULL);
1788
1789 /* If there are any driver level actions to take */
1790 if (drv->bdrv_reopen_commit) {
1791 drv->bdrv_reopen_commit(reopen_state);
1792 }
1793
1794 /* set BDS specific flags now */
1795 reopen_state->bs->open_flags = reopen_state->flags;
1796 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1797 BDRV_O_CACHE_WB);
1798 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
355ef4ac 1799
3baca891 1800 bdrv_refresh_limits(reopen_state->bs, NULL);
e971aa12
JC
1801}
1802
1803/*
1804 * Abort the reopen, and delete and free the staged changes in
1805 * reopen_state
1806 */
1807void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1808{
1809 BlockDriver *drv;
1810
1811 assert(reopen_state != NULL);
1812 drv = reopen_state->bs->drv;
1813 assert(drv != NULL);
1814
1815 if (drv->bdrv_reopen_abort) {
1816 drv->bdrv_reopen_abort(reopen_state);
1817 }
1818}
1819
1820
fc01f7e7
FB
1821void bdrv_close(BlockDriverState *bs)
1822{
33384421
HR
1823 BdrvAioNotifier *ban, *ban_next;
1824
3cbc002c
PB
1825 if (bs->job) {
1826 block_job_cancel_sync(bs->job);
1827 }
58fda173
SH
1828 bdrv_drain_all(); /* complete I/O */
1829 bdrv_flush(bs);
1830 bdrv_drain_all(); /* in case flush left pending I/O */
d7d512f6 1831 notifier_list_notify(&bs->close_notifiers, bs);
7094f12f 1832
3cbc002c 1833 if (bs->drv) {
557df6ac 1834 if (bs->backing_hd) {
826b6ca0
FZ
1835 BlockDriverState *backing_hd = bs->backing_hd;
1836 bdrv_set_backing_hd(bs, NULL);
1837 bdrv_unref(backing_hd);
557df6ac 1838 }
ea2384d3 1839 bs->drv->bdrv_close(bs);
7267c094 1840 g_free(bs->opaque);
ea2384d3
FB
1841 bs->opaque = NULL;
1842 bs->drv = NULL;
53fec9d3 1843 bs->copy_on_read = 0;
a275fa42
PB
1844 bs->backing_file[0] = '\0';
1845 bs->backing_format[0] = '\0';
6405875c
PB
1846 bs->total_sectors = 0;
1847 bs->encrypted = 0;
1848 bs->valid_key = 0;
1849 bs->sg = 0;
1850 bs->growable = 0;
0d51b4de 1851 bs->zero_beyond_eof = false;
de9c0cec
KW
1852 QDECREF(bs->options);
1853 bs->options = NULL;
91af7014
HR
1854 QDECREF(bs->full_open_options);
1855 bs->full_open_options = NULL;
b338082b 1856
66f82cee 1857 if (bs->file != NULL) {
4f6fd349 1858 bdrv_unref(bs->file);
0ac9377d 1859 bs->file = NULL;
66f82cee 1860 }
b338082b 1861 }
98f90dba 1862
9ca11154
PH
1863 bdrv_dev_change_media_cb(bs, false);
1864
98f90dba
ZYW
1865 /*throttling disk I/O limits*/
1866 if (bs->io_limits_enabled) {
1867 bdrv_io_limits_disable(bs);
1868 }
33384421
HR
1869
1870 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1871 g_free(ban);
1872 }
1873 QLIST_INIT(&bs->aio_notifiers);
b338082b
FB
1874}
1875
2bc93fed
MK
1876void bdrv_close_all(void)
1877{
1878 BlockDriverState *bs;
1879
dc364f4c 1880 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
1881 AioContext *aio_context = bdrv_get_aio_context(bs);
1882
1883 aio_context_acquire(aio_context);
2bc93fed 1884 bdrv_close(bs);
ed78cda3 1885 aio_context_release(aio_context);
2bc93fed
MK
1886 }
1887}
1888
88266f5a
SH
1889/* Check if any requests are in-flight (including throttled requests) */
1890static bool bdrv_requests_pending(BlockDriverState *bs)
1891{
1892 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1893 return true;
1894 }
cc0681c4
BC
1895 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1896 return true;
1897 }
1898 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
88266f5a
SH
1899 return true;
1900 }
1901 if (bs->file && bdrv_requests_pending(bs->file)) {
1902 return true;
1903 }
1904 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1905 return true;
1906 }
1907 return false;
1908}
1909
922453bc
SH
1910/*
1911 * Wait for pending requests to complete across all BlockDriverStates
1912 *
1913 * This function does not flush data to disk, use bdrv_flush_all() for that
1914 * after calling this function.
4c355d53
ZYW
1915 *
1916 * Note that completion of an asynchronous I/O operation can trigger any
1917 * number of other I/O operations on other devices---for example a coroutine
1918 * can be arbitrarily complex and a constant flow of I/O can come until the
1919 * coroutine is complete. Because of this, it is not possible to have a
1920 * function to drain a single device's I/O queue.
922453bc
SH
1921 */
1922void bdrv_drain_all(void)
1923{
88266f5a
SH
1924 /* Always run first iteration so any pending completion BHs run */
1925 bool busy = true;
922453bc
SH
1926 BlockDriverState *bs;
1927
88266f5a 1928 while (busy) {
9b536adc
SH
1929 busy = false;
1930
dc364f4c 1931 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
9b536adc
SH
1932 AioContext *aio_context = bdrv_get_aio_context(bs);
1933 bool bs_busy;
1934
1935 aio_context_acquire(aio_context);
448ad91d 1936 bdrv_flush_io_queue(bs);
0b06ef3b 1937 bdrv_start_throttled_reqs(bs);
9b536adc
SH
1938 bs_busy = bdrv_requests_pending(bs);
1939 bs_busy |= aio_poll(aio_context, bs_busy);
1940 aio_context_release(aio_context);
922453bc 1941
9b536adc
SH
1942 busy |= bs_busy;
1943 }
922453bc
SH
1944 }
1945}
1946
dc364f4c
BC
1947/* make a BlockDriverState anonymous by removing from bdrv_state and
1948 * graph_bdrv_state list.
d22b2f41
RH
1949 Also, NULL terminate the device_name to prevent double remove */
1950void bdrv_make_anon(BlockDriverState *bs)
1951{
1952 if (bs->device_name[0] != '\0') {
dc364f4c 1953 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
d22b2f41
RH
1954 }
1955 bs->device_name[0] = '\0';
dc364f4c
BC
1956 if (bs->node_name[0] != '\0') {
1957 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1958 }
1959 bs->node_name[0] = '\0';
d22b2f41
RH
1960}
1961
e023b2e2
PB
1962static void bdrv_rebind(BlockDriverState *bs)
1963{
1964 if (bs->drv && bs->drv->bdrv_rebind) {
1965 bs->drv->bdrv_rebind(bs);
1966 }
1967}
1968
4ddc07ca
PB
1969static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1970 BlockDriverState *bs_src)
8802d1fd 1971{
4ddc07ca 1972 /* move some fields that need to stay attached to the device */
8802d1fd
JC
1973
1974 /* dev info */
4ddc07ca
PB
1975 bs_dest->dev_ops = bs_src->dev_ops;
1976 bs_dest->dev_opaque = bs_src->dev_opaque;
1977 bs_dest->dev = bs_src->dev;
1b7fd729 1978 bs_dest->guest_block_size = bs_src->guest_block_size;
4ddc07ca 1979 bs_dest->copy_on_read = bs_src->copy_on_read;
8802d1fd 1980
4ddc07ca 1981 bs_dest->enable_write_cache = bs_src->enable_write_cache;
c4a248a1 1982
cc0681c4
BC
1983 /* i/o throttled req */
1984 memcpy(&bs_dest->throttle_state,
1985 &bs_src->throttle_state,
1986 sizeof(ThrottleState));
1987 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1988 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
4ddc07ca 1989 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
8802d1fd 1990
8802d1fd 1991 /* r/w error */
4ddc07ca
PB
1992 bs_dest->on_read_error = bs_src->on_read_error;
1993 bs_dest->on_write_error = bs_src->on_write_error;
8802d1fd
JC
1994
1995 /* i/o status */
4ddc07ca
PB
1996 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1997 bs_dest->iostatus = bs_src->iostatus;
8802d1fd 1998
a9fc4408 1999 /* dirty bitmap */
e4654d2d 2000 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
a9fc4408 2001
9fcb0251
FZ
2002 /* reference count */
2003 bs_dest->refcnt = bs_src->refcnt;
2004
a9fc4408 2005 /* job */
4ddc07ca 2006 bs_dest->job = bs_src->job;
a9fc4408 2007
8802d1fd 2008 /* keep the same entry in bdrv_states */
4ddc07ca
PB
2009 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
2010 bs_src->device_name);
dc364f4c 2011 bs_dest->device_list = bs_src->device_list;
fbe40ff7
FZ
2012 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2013 sizeof(bs_dest->op_blockers));
4ddc07ca 2014}
8802d1fd 2015
4ddc07ca
PB
2016/*
2017 * Swap bs contents for two image chains while they are live,
2018 * while keeping required fields on the BlockDriverState that is
2019 * actually attached to a device.
2020 *
2021 * This will modify the BlockDriverState fields, and swap contents
2022 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2023 *
2024 * bs_new is required to be anonymous.
2025 *
2026 * This function does not create any image files.
2027 */
2028void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2029{
2030 BlockDriverState tmp;
f6801b83 2031
90ce8a06
BC
2032 /* The code needs to swap the node_name but simply swapping node_list won't
2033 * work so first remove the nodes from the graph list, do the swap then
2034 * insert them back if needed.
2035 */
2036 if (bs_new->node_name[0] != '\0') {
2037 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2038 }
2039 if (bs_old->node_name[0] != '\0') {
2040 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2041 }
2042
4ddc07ca
PB
2043 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
2044 assert(bs_new->device_name[0] == '\0');
e4654d2d 2045 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
4ddc07ca
PB
2046 assert(bs_new->job == NULL);
2047 assert(bs_new->dev == NULL);
4ddc07ca 2048 assert(bs_new->io_limits_enabled == false);
cc0681c4 2049 assert(!throttle_have_timer(&bs_new->throttle_state));
8802d1fd 2050
4ddc07ca
PB
2051 tmp = *bs_new;
2052 *bs_new = *bs_old;
2053 *bs_old = tmp;
a9fc4408 2054
4ddc07ca
PB
2055 /* there are some fields that should not be swapped, move them back */
2056 bdrv_move_feature_fields(&tmp, bs_old);
2057 bdrv_move_feature_fields(bs_old, bs_new);
2058 bdrv_move_feature_fields(bs_new, &tmp);
8802d1fd 2059
4ddc07ca
PB
2060 /* bs_new shouldn't be in bdrv_states even after the swap! */
2061 assert(bs_new->device_name[0] == '\0');
2062
2063 /* Check a few fields that should remain attached to the device */
2064 assert(bs_new->dev == NULL);
2065 assert(bs_new->job == NULL);
4ddc07ca 2066 assert(bs_new->io_limits_enabled == false);
cc0681c4 2067 assert(!throttle_have_timer(&bs_new->throttle_state));
e023b2e2 2068
90ce8a06
BC
2069 /* insert the nodes back into the graph node list if needed */
2070 if (bs_new->node_name[0] != '\0') {
2071 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2072 }
2073 if (bs_old->node_name[0] != '\0') {
2074 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2075 }
2076
e023b2e2 2077 bdrv_rebind(bs_new);
4ddc07ca
PB
2078 bdrv_rebind(bs_old);
2079}
2080
2081/*
2082 * Add new bs contents at the top of an image chain while the chain is
2083 * live, while keeping required fields on the top layer.
2084 *
2085 * This will modify the BlockDriverState fields, and swap contents
2086 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2087 *
2088 * bs_new is required to be anonymous.
2089 *
2090 * This function does not create any image files.
2091 */
2092void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2093{
2094 bdrv_swap(bs_new, bs_top);
2095
2096 /* The contents of 'tmp' will become bs_top, as we are
2097 * swapping bs_new and bs_top contents. */
8d24cce1 2098 bdrv_set_backing_hd(bs_top, bs_new);
8802d1fd
JC
2099}
2100
4f6fd349 2101static void bdrv_delete(BlockDriverState *bs)
b338082b 2102{
fa879d62 2103 assert(!bs->dev);
3e914655 2104 assert(!bs->job);
3718d8ab 2105 assert(bdrv_op_blocker_is_empty(bs));
4f6fd349 2106 assert(!bs->refcnt);
e4654d2d 2107 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
18846dee 2108
e1b5c52e
SH
2109 bdrv_close(bs);
2110
1b7bdbc1 2111 /* remove from list, if necessary */
d22b2f41 2112 bdrv_make_anon(bs);
34c6f050 2113
3ae59580 2114 drive_info_del(drive_get_by_blockdev(bs));
7267c094 2115 g_free(bs);
fc01f7e7
FB
2116}
2117
fa879d62
MA
2118int bdrv_attach_dev(BlockDriverState *bs, void *dev)
2119/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 2120{
fa879d62 2121 if (bs->dev) {
18846dee
MA
2122 return -EBUSY;
2123 }
fa879d62 2124 bs->dev = dev;
28a7282a 2125 bdrv_iostatus_reset(bs);
2a87151f
SH
2126
2127 /* We're expecting I/O from the device so bump up coroutine pool size */
2128 qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
18846dee
MA
2129 return 0;
2130}
2131
fa879d62
MA
2132/* TODO qdevified devices don't use this, remove when devices are qdevified */
2133void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 2134{
fa879d62
MA
2135 if (bdrv_attach_dev(bs, dev) < 0) {
2136 abort();
2137 }
2138}
2139
2140void bdrv_detach_dev(BlockDriverState *bs, void *dev)
2141/* TODO change to DeviceState *dev when all users are qdevified */
2142{
2143 assert(bs->dev == dev);
2144 bs->dev = NULL;
0e49de52
MA
2145 bs->dev_ops = NULL;
2146 bs->dev_opaque = NULL;
1b7fd729 2147 bs->guest_block_size = 512;
2a87151f 2148 qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
18846dee
MA
2149}
2150
fa879d62
MA
2151/* TODO change to return DeviceState * when all users are qdevified */
2152void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 2153{
fa879d62 2154 return bs->dev;
18846dee
MA
2155}
2156
0e49de52
MA
2157void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2158 void *opaque)
2159{
2160 bs->dev_ops = ops;
2161 bs->dev_opaque = opaque;
2162}
2163
7d4b4ba5 2164static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 2165{
145feb17 2166 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
6f382ed2 2167 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
7d4b4ba5 2168 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
6f382ed2
LC
2169 if (tray_was_closed) {
2170 /* tray open */
a5ee7bd4
WX
2171 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2172 true, &error_abort);
6f382ed2
LC
2173 }
2174 if (load) {
2175 /* tray close */
a5ee7bd4
WX
2176 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2177 false, &error_abort);
6f382ed2 2178 }
145feb17
MA
2179 }
2180}
2181
2c6942fa
MA
2182bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2183{
2184 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2185}
2186
025ccaa7
PB
2187void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2188{
2189 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2190 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2191 }
2192}
2193
e4def80b
MA
2194bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2195{
2196 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2197 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2198 }
2199 return false;
2200}
2201
145feb17
MA
2202static void bdrv_dev_resize_cb(BlockDriverState *bs)
2203{
2204 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2205 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
2206 }
2207}
2208
f107639a
MA
2209bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2210{
2211 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2212 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2213 }
2214 return false;
2215}
2216
e97fc193
AL
2217/*
2218 * Run consistency checks on an image
2219 *
e076f338 2220 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 2221 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 2222 * check are stored in res.
e97fc193 2223 */
4534ff54 2224int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193 2225{
908bcd54
HR
2226 if (bs->drv == NULL) {
2227 return -ENOMEDIUM;
2228 }
e97fc193
AL
2229 if (bs->drv->bdrv_check == NULL) {
2230 return -ENOTSUP;
2231 }
2232
e076f338 2233 memset(res, 0, sizeof(*res));
4534ff54 2234 return bs->drv->bdrv_check(bs, res, fix);
e97fc193
AL
2235}
2236
8a426614
KW
2237#define COMMIT_BUF_SECTORS 2048
2238
33e3963e
FB
2239/* commit COW file into the raw image */
2240int bdrv_commit(BlockDriverState *bs)
2241{
19cb3738 2242 BlockDriver *drv = bs->drv;
72706ea4 2243 int64_t sector, total_sectors, length, backing_length;
8a426614 2244 int n, ro, open_flags;
0bce597d 2245 int ret = 0;
72706ea4 2246 uint8_t *buf = NULL;
c2cba3d9 2247 char filename[PATH_MAX];
33e3963e 2248
19cb3738
FB
2249 if (!drv)
2250 return -ENOMEDIUM;
6bb45158 2251
4dca4b63
NS
2252 if (!bs->backing_hd) {
2253 return -ENOTSUP;
33e3963e
FB
2254 }
2255
3718d8ab
FZ
2256 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2257 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
2d3735d3
SH
2258 return -EBUSY;
2259 }
2260
4dca4b63 2261 ro = bs->backing_hd->read_only;
c2cba3d9
JM
2262 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2263 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
4dca4b63
NS
2264 open_flags = bs->backing_hd->open_flags;
2265
2266 if (ro) {
0bce597d
JC
2267 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2268 return -EACCES;
4dca4b63 2269 }
ea2384d3 2270 }
33e3963e 2271
72706ea4
JC
2272 length = bdrv_getlength(bs);
2273 if (length < 0) {
2274 ret = length;
2275 goto ro_cleanup;
2276 }
2277
2278 backing_length = bdrv_getlength(bs->backing_hd);
2279 if (backing_length < 0) {
2280 ret = backing_length;
2281 goto ro_cleanup;
2282 }
2283
2284 /* If our top snapshot is larger than the backing file image,
2285 * grow the backing file image if possible. If not possible,
2286 * we must return an error */
2287 if (length > backing_length) {
2288 ret = bdrv_truncate(bs->backing_hd, length);
2289 if (ret < 0) {
2290 goto ro_cleanup;
2291 }
2292 }
2293
2294 total_sectors = length >> BDRV_SECTOR_BITS;
857d4f46
KW
2295
2296 /* qemu_try_blockalign() for bs will choose an alignment that works for
2297 * bs->backing_hd as well, so no need to compare the alignment manually. */
2298 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2299 if (buf == NULL) {
2300 ret = -ENOMEM;
2301 goto ro_cleanup;
2302 }
8a426614
KW
2303
2304 for (sector = 0; sector < total_sectors; sector += n) {
d663640c
PB
2305 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2306 if (ret < 0) {
2307 goto ro_cleanup;
2308 }
2309 if (ret) {
dabfa6cc
KW
2310 ret = bdrv_read(bs, sector, buf, n);
2311 if (ret < 0) {
8a426614
KW
2312 goto ro_cleanup;
2313 }
2314
dabfa6cc
KW
2315 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2316 if (ret < 0) {
8a426614
KW
2317 goto ro_cleanup;
2318 }
ea2384d3 2319 }
33e3963e 2320 }
95389c86 2321
1d44952f
CH
2322 if (drv->bdrv_make_empty) {
2323 ret = drv->bdrv_make_empty(bs);
dabfa6cc
KW
2324 if (ret < 0) {
2325 goto ro_cleanup;
2326 }
1d44952f
CH
2327 bdrv_flush(bs);
2328 }
95389c86 2329
3f5075ae
CH
2330 /*
2331 * Make sure all data we wrote to the backing device is actually
2332 * stable on disk.
2333 */
dabfa6cc 2334 if (bs->backing_hd) {
3f5075ae 2335 bdrv_flush(bs->backing_hd);
dabfa6cc 2336 }
4dca4b63 2337
dabfa6cc 2338 ret = 0;
4dca4b63 2339ro_cleanup:
857d4f46 2340 qemu_vfree(buf);
4dca4b63
NS
2341
2342 if (ro) {
0bce597d
JC
2343 /* ignoring error return here */
2344 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
4dca4b63
NS
2345 }
2346
1d44952f 2347 return ret;
33e3963e
FB
2348}
2349
e8877497 2350int bdrv_commit_all(void)
6ab4b5ab
MA
2351{
2352 BlockDriverState *bs;
2353
dc364f4c 2354 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
2355 AioContext *aio_context = bdrv_get_aio_context(bs);
2356
2357 aio_context_acquire(aio_context);
272d2d8e
JC
2358 if (bs->drv && bs->backing_hd) {
2359 int ret = bdrv_commit(bs);
2360 if (ret < 0) {
ed78cda3 2361 aio_context_release(aio_context);
272d2d8e
JC
2362 return ret;
2363 }
e8877497 2364 }
ed78cda3 2365 aio_context_release(aio_context);
6ab4b5ab 2366 }
e8877497 2367 return 0;
6ab4b5ab
MA
2368}
2369
dbffbdcf
SH
2370/**
2371 * Remove an active request from the tracked requests list
2372 *
2373 * This function should be called when a tracked request is completing.
2374 */
2375static void tracked_request_end(BdrvTrackedRequest *req)
2376{
2dbafdc0
KW
2377 if (req->serialising) {
2378 req->bs->serialising_in_flight--;
2379 }
2380
dbffbdcf 2381 QLIST_REMOVE(req, list);
f4658285 2382 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
2383}
2384
2385/**
2386 * Add an active request to the tracked requests list
2387 */
2388static void tracked_request_begin(BdrvTrackedRequest *req,
2389 BlockDriverState *bs,
793ed47a
KW
2390 int64_t offset,
2391 unsigned int bytes, bool is_write)
dbffbdcf
SH
2392{
2393 *req = (BdrvTrackedRequest){
2394 .bs = bs,
2dbafdc0
KW
2395 .offset = offset,
2396 .bytes = bytes,
2397 .is_write = is_write,
2398 .co = qemu_coroutine_self(),
2399 .serialising = false,
7327145f
KW
2400 .overlap_offset = offset,
2401 .overlap_bytes = bytes,
dbffbdcf
SH
2402 };
2403
f4658285
SH
2404 qemu_co_queue_init(&req->wait_queue);
2405
dbffbdcf
SH
2406 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2407}
2408
e96126ff 2409static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
2dbafdc0 2410{
7327145f 2411 int64_t overlap_offset = req->offset & ~(align - 1);
e96126ff
KW
2412 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2413 - overlap_offset;
7327145f 2414
2dbafdc0
KW
2415 if (!req->serialising) {
2416 req->bs->serialising_in_flight++;
2417 req->serialising = true;
2418 }
7327145f
KW
2419
2420 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2421 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
2dbafdc0
KW
2422}
2423
d83947ac
SH
2424/**
2425 * Round a region to cluster boundaries
2426 */
343bded4
PB
2427void bdrv_round_to_clusters(BlockDriverState *bs,
2428 int64_t sector_num, int nb_sectors,
2429 int64_t *cluster_sector_num,
2430 int *cluster_nb_sectors)
d83947ac
SH
2431{
2432 BlockDriverInfo bdi;
2433
2434 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2435 *cluster_sector_num = sector_num;
2436 *cluster_nb_sectors = nb_sectors;
2437 } else {
2438 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2439 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2440 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2441 nb_sectors, c);
2442 }
2443}
2444
7327145f 2445static int bdrv_get_cluster_size(BlockDriverState *bs)
793ed47a
KW
2446{
2447 BlockDriverInfo bdi;
7327145f 2448 int ret;
793ed47a 2449
7327145f
KW
2450 ret = bdrv_get_info(bs, &bdi);
2451 if (ret < 0 || bdi.cluster_size == 0) {
2452 return bs->request_alignment;
793ed47a 2453 } else {
7327145f 2454 return bdi.cluster_size;
793ed47a
KW
2455 }
2456}
2457
f4658285 2458static bool tracked_request_overlaps(BdrvTrackedRequest *req,
793ed47a
KW
2459 int64_t offset, unsigned int bytes)
2460{
d83947ac 2461 /* aaaa bbbb */
7327145f 2462 if (offset >= req->overlap_offset + req->overlap_bytes) {
d83947ac
SH
2463 return false;
2464 }
2465 /* bbbb aaaa */
7327145f 2466 if (req->overlap_offset >= offset + bytes) {
d83947ac
SH
2467 return false;
2468 }
2469 return true;
f4658285
SH
2470}
2471
28de2dcd 2472static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
f4658285 2473{
2dbafdc0 2474 BlockDriverState *bs = self->bs;
f4658285
SH
2475 BdrvTrackedRequest *req;
2476 bool retry;
28de2dcd 2477 bool waited = false;
f4658285 2478
2dbafdc0 2479 if (!bs->serialising_in_flight) {
28de2dcd 2480 return false;
2dbafdc0
KW
2481 }
2482
f4658285
SH
2483 do {
2484 retry = false;
2485 QLIST_FOREACH(req, &bs->tracked_requests, list) {
2dbafdc0 2486 if (req == self || (!req->serialising && !self->serialising)) {
65afd211
KW
2487 continue;
2488 }
7327145f
KW
2489 if (tracked_request_overlaps(req, self->overlap_offset,
2490 self->overlap_bytes))
2491 {
5f8b6491
SH
2492 /* Hitting this means there was a reentrant request, for
2493 * example, a block driver issuing nested requests. This must
2494 * never happen since it means deadlock.
2495 */
2496 assert(qemu_coroutine_self() != req->co);
2497
6460440f
KW
2498 /* If the request is already (indirectly) waiting for us, or
2499 * will wait for us as soon as it wakes up, then just go on
2500 * (instead of producing a deadlock in the former case). */
2501 if (!req->waiting_for) {
2502 self->waiting_for = req;
2503 qemu_co_queue_wait(&req->wait_queue);
2504 self->waiting_for = NULL;
2505 retry = true;
28de2dcd 2506 waited = true;
6460440f
KW
2507 break;
2508 }
f4658285
SH
2509 }
2510 }
2511 } while (retry);
28de2dcd
KW
2512
2513 return waited;
f4658285
SH
2514}
2515
756e6736
KW
2516/*
2517 * Return values:
2518 * 0 - success
2519 * -EINVAL - backing format specified, but no file
2520 * -ENOSPC - can't update the backing file because no space is left in the
2521 * image file header
2522 * -ENOTSUP - format driver doesn't support changing the backing file
2523 */
2524int bdrv_change_backing_file(BlockDriverState *bs,
2525 const char *backing_file, const char *backing_fmt)
2526{
2527 BlockDriver *drv = bs->drv;
469ef350 2528 int ret;
756e6736 2529
5f377794
PB
2530 /* Backing file format doesn't make sense without a backing file */
2531 if (backing_fmt && !backing_file) {
2532 return -EINVAL;
2533 }
2534
756e6736 2535 if (drv->bdrv_change_backing_file != NULL) {
469ef350 2536 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 2537 } else {
469ef350 2538 ret = -ENOTSUP;
756e6736 2539 }
469ef350
PB
2540
2541 if (ret == 0) {
2542 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2543 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2544 }
2545 return ret;
756e6736
KW
2546}
2547
6ebdcee2
JC
2548/*
2549 * Finds the image layer in the chain that has 'bs' as its backing file.
2550 *
2551 * active is the current topmost image.
2552 *
2553 * Returns NULL if bs is not found in active's image chain,
2554 * or if active == bs.
4caf0fcd
JC
2555 *
2556 * Returns the bottommost base image if bs == NULL.
6ebdcee2
JC
2557 */
2558BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2559 BlockDriverState *bs)
2560{
4caf0fcd
JC
2561 while (active && bs != active->backing_hd) {
2562 active = active->backing_hd;
6ebdcee2
JC
2563 }
2564
4caf0fcd
JC
2565 return active;
2566}
6ebdcee2 2567
4caf0fcd
JC
2568/* Given a BDS, searches for the base layer. */
2569BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2570{
2571 return bdrv_find_overlay(bs, NULL);
6ebdcee2
JC
2572}
2573
2574typedef struct BlkIntermediateStates {
2575 BlockDriverState *bs;
2576 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2577} BlkIntermediateStates;
2578
2579
2580/*
2581 * Drops images above 'base' up to and including 'top', and sets the image
2582 * above 'top' to have base as its backing file.
2583 *
2584 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2585 * information in 'bs' can be properly updated.
2586 *
2587 * E.g., this will convert the following chain:
2588 * bottom <- base <- intermediate <- top <- active
2589 *
2590 * to
2591 *
2592 * bottom <- base <- active
2593 *
2594 * It is allowed for bottom==base, in which case it converts:
2595 *
2596 * base <- intermediate <- top <- active
2597 *
2598 * to
2599 *
2600 * base <- active
2601 *
54e26900
JC
2602 * If backing_file_str is non-NULL, it will be used when modifying top's
2603 * overlay image metadata.
2604 *
6ebdcee2
JC
2605 * Error conditions:
2606 * if active == top, that is considered an error
2607 *
2608 */
2609int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
54e26900 2610 BlockDriverState *base, const char *backing_file_str)
6ebdcee2
JC
2611{
2612 BlockDriverState *intermediate;
2613 BlockDriverState *base_bs = NULL;
2614 BlockDriverState *new_top_bs = NULL;
2615 BlkIntermediateStates *intermediate_state, *next;
2616 int ret = -EIO;
2617
2618 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2619 QSIMPLEQ_INIT(&states_to_delete);
2620
2621 if (!top->drv || !base->drv) {
2622 goto exit;
2623 }
2624
2625 new_top_bs = bdrv_find_overlay(active, top);
2626
2627 if (new_top_bs == NULL) {
2628 /* we could not find the image above 'top', this is an error */
2629 goto exit;
2630 }
2631
2632 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2633 * to do, no intermediate images */
2634 if (new_top_bs->backing_hd == base) {
2635 ret = 0;
2636 goto exit;
2637 }
2638
2639 intermediate = top;
2640
2641 /* now we will go down through the list, and add each BDS we find
2642 * into our deletion queue, until we hit the 'base'
2643 */
2644 while (intermediate) {
5839e53b 2645 intermediate_state = g_new0(BlkIntermediateStates, 1);
6ebdcee2
JC
2646 intermediate_state->bs = intermediate;
2647 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2648
2649 if (intermediate->backing_hd == base) {
2650 base_bs = intermediate->backing_hd;
2651 break;
2652 }
2653 intermediate = intermediate->backing_hd;
2654 }
2655 if (base_bs == NULL) {
2656 /* something went wrong, we did not end at the base. safely
2657 * unravel everything, and exit with error */
2658 goto exit;
2659 }
2660
2661 /* success - we can delete the intermediate states, and link top->base */
54e26900
JC
2662 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2663 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
6ebdcee2
JC
2664 base_bs->drv ? base_bs->drv->format_name : "");
2665 if (ret) {
2666 goto exit;
2667 }
920beae1 2668 bdrv_set_backing_hd(new_top_bs, base_bs);
6ebdcee2
JC
2669
2670 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2671 /* so that bdrv_close() does not recursively close the chain */
920beae1 2672 bdrv_set_backing_hd(intermediate_state->bs, NULL);
4f6fd349 2673 bdrv_unref(intermediate_state->bs);
6ebdcee2
JC
2674 }
2675 ret = 0;
2676
2677exit:
2678 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2679 g_free(intermediate_state);
2680 }
2681 return ret;
2682}
2683
2684
71d0770c
AL
2685static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2686 size_t size)
2687{
2688 int64_t len;
2689
1dd3a447
KW
2690 if (size > INT_MAX) {
2691 return -EIO;
2692 }
2693
71d0770c
AL
2694 if (!bdrv_is_inserted(bs))
2695 return -ENOMEDIUM;
2696
2697 if (bs->growable)
2698 return 0;
2699
2700 len = bdrv_getlength(bs);
2701
fbb7b4e0
KW
2702 if (offset < 0)
2703 return -EIO;
2704
2705 if ((offset > len) || (len - offset < size))
71d0770c
AL
2706 return -EIO;
2707
2708 return 0;
2709}
2710
2711static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2712 int nb_sectors)
2713{
54db38a4 2714 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
8f4754ed
KW
2715 return -EIO;
2716 }
2717
eb5a3165
JS
2718 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2719 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
2720}
2721
1c9805a3
SH
2722typedef struct RwCo {
2723 BlockDriverState *bs;
775aa8b6 2724 int64_t offset;
1c9805a3
SH
2725 QEMUIOVector *qiov;
2726 bool is_write;
2727 int ret;
4105eaaa 2728 BdrvRequestFlags flags;
1c9805a3
SH
2729} RwCo;
2730
2731static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 2732{
1c9805a3 2733 RwCo *rwco = opaque;
ea2384d3 2734
1c9805a3 2735 if (!rwco->is_write) {
775aa8b6
KW
2736 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2737 rwco->qiov->size, rwco->qiov,
4105eaaa 2738 rwco->flags);
775aa8b6
KW
2739 } else {
2740 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2741 rwco->qiov->size, rwco->qiov,
2742 rwco->flags);
1c9805a3
SH
2743 }
2744}
e7a8a783 2745
1c9805a3 2746/*
8d3b1a2d 2747 * Process a vectored synchronous request using coroutines
1c9805a3 2748 */
775aa8b6
KW
2749static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2750 QEMUIOVector *qiov, bool is_write,
2751 BdrvRequestFlags flags)
1c9805a3 2752{
1c9805a3
SH
2753 Coroutine *co;
2754 RwCo rwco = {
2755 .bs = bs,
775aa8b6 2756 .offset = offset,
8d3b1a2d 2757 .qiov = qiov,
1c9805a3
SH
2758 .is_write = is_write,
2759 .ret = NOT_DONE,
4105eaaa 2760 .flags = flags,
1c9805a3 2761 };
e7a8a783 2762
498e386c
ZYW
2763 /**
2764 * In sync call context, when the vcpu is blocked, this throttling timer
2765 * will not fire; so the I/O throttling function has to be disabled here
2766 * if it has been enabled.
2767 */
2768 if (bs->io_limits_enabled) {
2769 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2770 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2771 bdrv_io_limits_disable(bs);
2772 }
2773
1c9805a3
SH
2774 if (qemu_in_coroutine()) {
2775 /* Fast-path if already in coroutine context */
2776 bdrv_rw_co_entry(&rwco);
2777 } else {
2572b37a
SH
2778 AioContext *aio_context = bdrv_get_aio_context(bs);
2779
1c9805a3
SH
2780 co = qemu_coroutine_create(bdrv_rw_co_entry);
2781 qemu_coroutine_enter(co, &rwco);
2782 while (rwco.ret == NOT_DONE) {
2572b37a 2783 aio_poll(aio_context, true);
1c9805a3
SH
2784 }
2785 }
2786 return rwco.ret;
2787}
b338082b 2788
8d3b1a2d
KW
2789/*
2790 * Process a synchronous request using coroutines
2791 */
2792static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
4105eaaa 2793 int nb_sectors, bool is_write, BdrvRequestFlags flags)
8d3b1a2d
KW
2794{
2795 QEMUIOVector qiov;
2796 struct iovec iov = {
2797 .iov_base = (void *)buf,
2798 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2799 };
2800
da15ee51
KW
2801 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2802 return -EINVAL;
2803 }
2804
8d3b1a2d 2805 qemu_iovec_init_external(&qiov, &iov, 1);
775aa8b6
KW
2806 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2807 &qiov, is_write, flags);
8d3b1a2d
KW
2808}
2809
1c9805a3
SH
2810/* return < 0 if error. See bdrv_write() for the return codes */
2811int bdrv_read(BlockDriverState *bs, int64_t sector_num,
2812 uint8_t *buf, int nb_sectors)
2813{
4105eaaa 2814 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
fc01f7e7
FB
2815}
2816
07d27a44
MA
2817/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2818int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2819 uint8_t *buf, int nb_sectors)
2820{
2821 bool enabled;
2822 int ret;
2823
2824 enabled = bs->io_limits_enabled;
2825 bs->io_limits_enabled = false;
4e7395e8 2826 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
07d27a44
MA
2827 bs->io_limits_enabled = enabled;
2828 return ret;
2829}
2830
5fafdf24 2831/* Return < 0 if error. Important errors are:
19cb3738
FB
2832 -EIO generic I/O error (may happen for all errors)
2833 -ENOMEDIUM No media inserted.
2834 -EINVAL Invalid sector number or nb_sectors
2835 -EACCES Trying to write a read-only device
2836*/
5fafdf24 2837int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
2838 const uint8_t *buf, int nb_sectors)
2839{
4105eaaa 2840 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
83f64091
FB
2841}
2842
aa7bfbff
PL
2843int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2844 int nb_sectors, BdrvRequestFlags flags)
4105eaaa
PL
2845{
2846 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
aa7bfbff 2847 BDRV_REQ_ZERO_WRITE | flags);
8d3b1a2d
KW
2848}
2849
d75cbb5e
PL
2850/*
2851 * Completely zero out a block device with the help of bdrv_write_zeroes.
2852 * The operation is sped up by checking the block status and only writing
2853 * zeroes to the device if they currently do not return zeroes. Optional
2854 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2855 *
2856 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2857 */
2858int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2859{
d32f7c10 2860 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
d75cbb5e
PL
2861 int n;
2862
d32f7c10
MA
2863 target_sectors = bdrv_nb_sectors(bs);
2864 if (target_sectors < 0) {
2865 return target_sectors;
9ce10c0b 2866 }
9ce10c0b 2867
d75cbb5e 2868 for (;;) {
d32f7c10 2869 nb_sectors = target_sectors - sector_num;
d75cbb5e
PL
2870 if (nb_sectors <= 0) {
2871 return 0;
2872 }
2873 if (nb_sectors > INT_MAX) {
2874 nb_sectors = INT_MAX;
2875 }
2876 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
3d94ce60
PL
2877 if (ret < 0) {
2878 error_report("error getting block status at sector %" PRId64 ": %s",
2879 sector_num, strerror(-ret));
2880 return ret;
2881 }
d75cbb5e
PL
2882 if (ret & BDRV_BLOCK_ZERO) {
2883 sector_num += n;
2884 continue;
2885 }
2886 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2887 if (ret < 0) {
2888 error_report("error writing zeroes at sector %" PRId64 ": %s",
2889 sector_num, strerror(-ret));
2890 return ret;
2891 }
2892 sector_num += n;
2893 }
2894}
2895
a3ef6571 2896int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
83f64091 2897{
a3ef6571
KW
2898 QEMUIOVector qiov;
2899 struct iovec iov = {
2900 .iov_base = (void *)buf,
2901 .iov_len = bytes,
2902 };
9a8c4cce 2903 int ret;
83f64091 2904
a3ef6571
KW
2905 if (bytes < 0) {
2906 return -EINVAL;
83f64091
FB
2907 }
2908
a3ef6571
KW
2909 qemu_iovec_init_external(&qiov, &iov, 1);
2910 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2911 if (ret < 0) {
2912 return ret;
83f64091 2913 }
a3ef6571
KW
2914
2915 return bytes;
83f64091
FB
2916}
2917
8d3b1a2d 2918int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
83f64091 2919{
9a8c4cce 2920 int ret;
83f64091 2921
8407d5d7
KW
2922 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2923 if (ret < 0) {
2924 return ret;
83f64091
FB
2925 }
2926
8d3b1a2d
KW
2927 return qiov->size;
2928}
2929
2930int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
8407d5d7 2931 const void *buf, int bytes)
8d3b1a2d
KW
2932{
2933 QEMUIOVector qiov;
2934 struct iovec iov = {
2935 .iov_base = (void *) buf,
8407d5d7 2936 .iov_len = bytes,
8d3b1a2d
KW
2937 };
2938
8407d5d7
KW
2939 if (bytes < 0) {
2940 return -EINVAL;
2941 }
2942
8d3b1a2d
KW
2943 qemu_iovec_init_external(&qiov, &iov, 1);
2944 return bdrv_pwritev(bs, offset, &qiov);
83f64091 2945}
83f64091 2946
f08145fe
KW
2947/*
2948 * Writes to the file and ensures that no writes are reordered across this
2949 * request (acts as a barrier)
2950 *
2951 * Returns 0 on success, -errno in error cases.
2952 */
2953int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2954 const void *buf, int count)
2955{
2956 int ret;
2957
2958 ret = bdrv_pwrite(bs, offset, buf, count);
2959 if (ret < 0) {
2960 return ret;
2961 }
2962
f05fa4ad
PB
2963 /* No flush needed for cache modes that already do it */
2964 if (bs->enable_write_cache) {
f08145fe
KW
2965 bdrv_flush(bs);
2966 }
2967
2968 return 0;
2969}
2970
470c0504 2971static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
ab185921
SH
2972 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2973{
2974 /* Perform I/O through a temporary buffer so that users who scribble over
2975 * their read buffer while the operation is in progress do not end up
2976 * modifying the image file. This is critical for zero-copy guest I/O
2977 * where anything might happen inside guest memory.
2978 */
2979 void *bounce_buffer;
2980
79c053bd 2981 BlockDriver *drv = bs->drv;
ab185921
SH
2982 struct iovec iov;
2983 QEMUIOVector bounce_qiov;
2984 int64_t cluster_sector_num;
2985 int cluster_nb_sectors;
2986 size_t skip_bytes;
2987 int ret;
2988
2989 /* Cover entire cluster so no additional backing file I/O is required when
2990 * allocating cluster in the image file.
2991 */
343bded4
PB
2992 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2993 &cluster_sector_num, &cluster_nb_sectors);
ab185921 2994
470c0504
SH
2995 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2996 cluster_sector_num, cluster_nb_sectors);
ab185921
SH
2997
2998 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
857d4f46
KW
2999 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3000 if (bounce_buffer == NULL) {
3001 ret = -ENOMEM;
3002 goto err;
3003 }
3004
ab185921
SH
3005 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3006
79c053bd
SH
3007 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3008 &bounce_qiov);
ab185921
SH
3009 if (ret < 0) {
3010 goto err;
3011 }
3012
79c053bd
SH
3013 if (drv->bdrv_co_write_zeroes &&
3014 buffer_is_zero(bounce_buffer, iov.iov_len)) {
621f0589 3015 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
aa7bfbff 3016 cluster_nb_sectors, 0);
79c053bd 3017 } else {
f05fa4ad
PB
3018 /* This does not change the data on the disk, it is not necessary
3019 * to flush even in cache=writethrough mode.
3020 */
79c053bd 3021 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
ab185921 3022 &bounce_qiov);
79c053bd
SH
3023 }
3024
ab185921
SH
3025 if (ret < 0) {
3026 /* It might be okay to ignore write errors for guest requests. If this
3027 * is a deliberate copy-on-read then we don't want to ignore the error.
3028 * Simply report it in all cases.
3029 */
3030 goto err;
3031 }
3032
3033 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
03396148
MT
3034 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3035 nb_sectors * BDRV_SECTOR_SIZE);
ab185921
SH
3036
3037err:
3038 qemu_vfree(bounce_buffer);
3039 return ret;
3040}
3041
c5fbe571 3042/*
d0c7f642
KW
3043 * Forwards an already correctly aligned request to the BlockDriver. This
3044 * handles copy on read and zeroing after EOF; any other features must be
3045 * implemented by the caller.
c5fbe571 3046 */
d0c7f642 3047static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
65afd211 3048 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
ec746e10 3049 int64_t align, QEMUIOVector *qiov, int flags)
da1fa91d
KW
3050{
3051 BlockDriver *drv = bs->drv;
dbffbdcf 3052 int ret;
da1fa91d 3053
d0c7f642
KW
3054 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3055 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
da1fa91d 3056
d0c7f642
KW
3057 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3058 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
8eb029c2 3059 assert(!qiov || bytes == qiov->size);
d0c7f642
KW
3060
3061 /* Handle Copy on Read and associated serialisation */
470c0504 3062 if (flags & BDRV_REQ_COPY_ON_READ) {
7327145f
KW
3063 /* If we touch the same cluster it counts as an overlap. This
3064 * guarantees that allocating writes will be serialized and not race
3065 * with each other for the same cluster. For example, in copy-on-read
3066 * it ensures that the CoR read and write operations are atomic and
3067 * guest writes cannot interleave between them. */
3068 mark_request_serialising(req, bdrv_get_cluster_size(bs));
470c0504
SH
3069 }
3070
2dbafdc0 3071 wait_serialising_requests(req);
f4658285 3072
470c0504 3073 if (flags & BDRV_REQ_COPY_ON_READ) {
ab185921
SH
3074 int pnum;
3075
bdad13b9 3076 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
ab185921
SH
3077 if (ret < 0) {
3078 goto out;
3079 }
3080
3081 if (!ret || pnum != nb_sectors) {
470c0504 3082 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
3083 goto out;
3084 }
3085 }
3086
d0c7f642 3087 /* Forward the request to the BlockDriver */
893a8f62
MK
3088 if (!(bs->zero_beyond_eof && bs->growable)) {
3089 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3090 } else {
3091 /* Read zeros after EOF of growable BDSes */
4049082c 3092 int64_t total_sectors, max_nb_sectors;
893a8f62 3093
4049082c
MA
3094 total_sectors = bdrv_nb_sectors(bs);
3095 if (total_sectors < 0) {
3096 ret = total_sectors;
893a8f62
MK
3097 goto out;
3098 }
3099
5f5bcd80
KW
3100 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3101 align >> BDRV_SECTOR_BITS);
893a8f62 3102 if (max_nb_sectors > 0) {
33f461e0
KW
3103 QEMUIOVector local_qiov;
3104 size_t local_sectors;
3105
3106 max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
3107 local_sectors = MIN(max_nb_sectors, nb_sectors);
3108
3109 qemu_iovec_init(&local_qiov, qiov->niov);
3110 qemu_iovec_concat(&local_qiov, qiov, 0,
3111 local_sectors * BDRV_SECTOR_SIZE);
3112
3113 ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
3114 &local_qiov);
3115
3116 qemu_iovec_destroy(&local_qiov);
893a8f62
MK
3117 } else {
3118 ret = 0;
3119 }
3120
3121 /* Reading beyond end of file is supposed to produce zeroes */
3122 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3123 uint64_t offset = MAX(0, total_sectors - sector_num);
3124 uint64_t bytes = (sector_num + nb_sectors - offset) *
3125 BDRV_SECTOR_SIZE;
3126 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3127 }
3128 }
ab185921
SH
3129
3130out:
dbffbdcf 3131 return ret;
da1fa91d
KW
3132}
3133
d0c7f642
KW
3134/*
3135 * Handle a read request in coroutine context
3136 */
1b0288ae
KW
3137static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3138 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
d0c7f642
KW
3139 BdrvRequestFlags flags)
3140{
3141 BlockDriver *drv = bs->drv;
65afd211
KW
3142 BdrvTrackedRequest req;
3143
1b0288ae
KW
3144 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3145 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3146 uint8_t *head_buf = NULL;
3147 uint8_t *tail_buf = NULL;
3148 QEMUIOVector local_qiov;
3149 bool use_local_qiov = false;
d0c7f642
KW
3150 int ret;
3151
3152 if (!drv) {
3153 return -ENOMEDIUM;
3154 }
1b0288ae 3155 if (bdrv_check_byte_request(bs, offset, bytes)) {
d0c7f642
KW
3156 return -EIO;
3157 }
3158
3159 if (bs->copy_on_read) {
3160 flags |= BDRV_REQ_COPY_ON_READ;
3161 }
3162
3163 /* throttling disk I/O */
3164 if (bs->io_limits_enabled) {
d5103588 3165 bdrv_io_limits_intercept(bs, bytes, false);
1b0288ae
KW
3166 }
3167
3168 /* Align read if necessary by padding qiov */
3169 if (offset & (align - 1)) {
3170 head_buf = qemu_blockalign(bs, align);
3171 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3172 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3173 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3174 use_local_qiov = true;
3175
3176 bytes += offset & (align - 1);
3177 offset = offset & ~(align - 1);
3178 }
3179
3180 if ((offset + bytes) & (align - 1)) {
3181 if (!use_local_qiov) {
3182 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3183 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3184 use_local_qiov = true;
3185 }
3186 tail_buf = qemu_blockalign(bs, align);
3187 qemu_iovec_add(&local_qiov, tail_buf,
3188 align - ((offset + bytes) & (align - 1)));
3189
3190 bytes = ROUND_UP(bytes, align);
3191 }
3192
65afd211 3193 tracked_request_begin(&req, bs, offset, bytes, false);
ec746e10 3194 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
1b0288ae
KW
3195 use_local_qiov ? &local_qiov : qiov,
3196 flags);
65afd211 3197 tracked_request_end(&req);
1b0288ae
KW
3198
3199 if (use_local_qiov) {
3200 qemu_iovec_destroy(&local_qiov);
3201 qemu_vfree(head_buf);
3202 qemu_vfree(tail_buf);
d0c7f642
KW
3203 }
3204
d0c7f642
KW
3205 return ret;
3206}
3207
1b0288ae
KW
3208static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3209 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3210 BdrvRequestFlags flags)
3211{
3212 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3213 return -EINVAL;
3214 }
3215
3216 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3217 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3218}
3219
c5fbe571 3220int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
3221 int nb_sectors, QEMUIOVector *qiov)
3222{
c5fbe571 3223 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 3224
470c0504
SH
3225 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3226}
3227
3228int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3229 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3230{
3231 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3232
3233 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3234 BDRV_REQ_COPY_ON_READ);
c5fbe571
SH
3235}
3236
c31cb707
PL
3237/* if no limit is specified in the BlockLimits use a default
3238 * of 32768 512-byte sectors (16 MiB) per request.
3239 */
3240#define MAX_WRITE_ZEROES_DEFAULT 32768
3241
f08f2dda 3242static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
aa7bfbff 3243 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
f08f2dda
SH
3244{
3245 BlockDriver *drv = bs->drv;
3246 QEMUIOVector qiov;
c31cb707
PL
3247 struct iovec iov = {0};
3248 int ret = 0;
f08f2dda 3249
c31cb707
PL
3250 int max_write_zeroes = bs->bl.max_write_zeroes ?
3251 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
621f0589 3252
c31cb707
PL
3253 while (nb_sectors > 0 && !ret) {
3254 int num = nb_sectors;
3255
b8d71c09
PB
3256 /* Align request. Block drivers can expect the "bulk" of the request
3257 * to be aligned.
3258 */
3259 if (bs->bl.write_zeroes_alignment
3260 && num > bs->bl.write_zeroes_alignment) {
3261 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3262 /* Make a small request up to the first aligned sector. */
c31cb707 3263 num = bs->bl.write_zeroes_alignment;
b8d71c09
PB
3264 num -= sector_num % bs->bl.write_zeroes_alignment;
3265 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3266 /* Shorten the request to the last aligned sector. num cannot
3267 * underflow because num > bs->bl.write_zeroes_alignment.
3268 */
3269 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
c31cb707 3270 }
621f0589 3271 }
f08f2dda 3272
c31cb707
PL
3273 /* limit request size */
3274 if (num > max_write_zeroes) {
3275 num = max_write_zeroes;
3276 }
3277
3278 ret = -ENOTSUP;
3279 /* First try the efficient write zeroes operation */
3280 if (drv->bdrv_co_write_zeroes) {
3281 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3282 }
3283
3284 if (ret == -ENOTSUP) {
3285 /* Fall back to bounce buffer if write zeroes is unsupported */
3286 iov.iov_len = num * BDRV_SECTOR_SIZE;
3287 if (iov.iov_base == NULL) {
857d4f46
KW
3288 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3289 if (iov.iov_base == NULL) {
3290 ret = -ENOMEM;
3291 goto fail;
3292 }
b8d71c09 3293 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
c31cb707
PL
3294 }
3295 qemu_iovec_init_external(&qiov, &iov, 1);
f08f2dda 3296
c31cb707 3297 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
b8d71c09
PB
3298
3299 /* Keep bounce buffer around if it is big enough for all
3300 * all future requests.
3301 */
3302 if (num < max_write_zeroes) {
3303 qemu_vfree(iov.iov_base);
3304 iov.iov_base = NULL;
3305 }
c31cb707
PL
3306 }
3307
3308 sector_num += num;
3309 nb_sectors -= num;
3310 }
f08f2dda 3311
857d4f46 3312fail:
f08f2dda
SH
3313 qemu_vfree(iov.iov_base);
3314 return ret;
3315}
3316
c5fbe571 3317/*
b404f720 3318 * Forwards an already correctly aligned write request to the BlockDriver.
c5fbe571 3319 */
b404f720 3320static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
65afd211
KW
3321 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3322 QEMUIOVector *qiov, int flags)
c5fbe571
SH
3323{
3324 BlockDriver *drv = bs->drv;
28de2dcd 3325 bool waited;
6b7cb247 3326 int ret;
da1fa91d 3327
b404f720
KW
3328 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3329 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
f4658285 3330
b404f720
KW
3331 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3332 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
8eb029c2 3333 assert(!qiov || bytes == qiov->size);
cc0681c4 3334
28de2dcd
KW
3335 waited = wait_serialising_requests(req);
3336 assert(!waited || !req->serialising);
af91f9a7
KW
3337 assert(req->overlap_offset <= offset);
3338 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
244eadef 3339
65afd211 3340 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
d616b224 3341
465bee1d
PL
3342 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3343 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3344 qemu_iovec_is_zero(qiov)) {
3345 flags |= BDRV_REQ_ZERO_WRITE;
3346 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3347 flags |= BDRV_REQ_MAY_UNMAP;
3348 }
3349 }
3350
d616b224
SH
3351 if (ret < 0) {
3352 /* Do nothing, write notifier decided to fail this request */
3353 } else if (flags & BDRV_REQ_ZERO_WRITE) {
9e1cb96d 3354 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
aa7bfbff 3355 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
f08f2dda 3356 } else {
9e1cb96d 3357 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
f08f2dda
SH
3358 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3359 }
9e1cb96d 3360 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
6b7cb247 3361
f05fa4ad
PB
3362 if (ret == 0 && !bs->enable_write_cache) {
3363 ret = bdrv_co_flush(bs);
3364 }
3365
e4654d2d 3366 bdrv_set_dirty(bs, sector_num, nb_sectors);
da1fa91d 3367
5366d0c8 3368 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
5e5a94b6 3369
df2a6f29
PB
3370 if (bs->growable && ret >= 0) {
3371 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3372 }
da1fa91d 3373
6b7cb247 3374 return ret;
da1fa91d
KW
3375}
3376
b404f720
KW
3377/*
3378 * Handle a write request in coroutine context
3379 */
6601553e
KW
3380static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3381 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
b404f720
KW
3382 BdrvRequestFlags flags)
3383{
65afd211 3384 BdrvTrackedRequest req;
3b8242e0
KW
3385 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3386 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3387 uint8_t *head_buf = NULL;
3388 uint8_t *tail_buf = NULL;
3389 QEMUIOVector local_qiov;
3390 bool use_local_qiov = false;
b404f720
KW
3391 int ret;
3392
3393 if (!bs->drv) {
3394 return -ENOMEDIUM;
3395 }
3396 if (bs->read_only) {
3397 return -EACCES;
3398 }
6601553e 3399 if (bdrv_check_byte_request(bs, offset, bytes)) {
b404f720
KW
3400 return -EIO;
3401 }
3402
b404f720
KW
3403 /* throttling disk I/O */
3404 if (bs->io_limits_enabled) {
d5103588 3405 bdrv_io_limits_intercept(bs, bytes, true);
b404f720
KW
3406 }
3407
3b8242e0
KW
3408 /*
3409 * Align write if necessary by performing a read-modify-write cycle.
3410 * Pad qiov with the read parts and be sure to have a tracked request not
3411 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3412 */
65afd211 3413 tracked_request_begin(&req, bs, offset, bytes, true);
3b8242e0
KW
3414
3415 if (offset & (align - 1)) {
3416 QEMUIOVector head_qiov;
3417 struct iovec head_iov;
3418
3419 mark_request_serialising(&req, align);
3420 wait_serialising_requests(&req);
3421
3422 head_buf = qemu_blockalign(bs, align);
3423 head_iov = (struct iovec) {
3424 .iov_base = head_buf,
3425 .iov_len = align,
3426 };
3427 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3428
9e1cb96d 3429 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
3b8242e0
KW
3430 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3431 align, &head_qiov, 0);
3432 if (ret < 0) {
3433 goto fail;
3434 }
9e1cb96d 3435 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
3b8242e0
KW
3436
3437 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3438 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3439 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3440 use_local_qiov = true;
3441
3442 bytes += offset & (align - 1);
3443 offset = offset & ~(align - 1);
3444 }
3445
3446 if ((offset + bytes) & (align - 1)) {
3447 QEMUIOVector tail_qiov;
3448 struct iovec tail_iov;
3449 size_t tail_bytes;
28de2dcd 3450 bool waited;
3b8242e0
KW
3451
3452 mark_request_serialising(&req, align);
28de2dcd
KW
3453 waited = wait_serialising_requests(&req);
3454 assert(!waited || !use_local_qiov);
3b8242e0
KW
3455
3456 tail_buf = qemu_blockalign(bs, align);
3457 tail_iov = (struct iovec) {
3458 .iov_base = tail_buf,
3459 .iov_len = align,
3460 };
3461 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3462
9e1cb96d 3463 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
3b8242e0
KW
3464 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3465 align, &tail_qiov, 0);
3466 if (ret < 0) {
3467 goto fail;
3468 }
9e1cb96d 3469 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
3b8242e0
KW
3470
3471 if (!use_local_qiov) {
3472 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3473 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3474 use_local_qiov = true;
3475 }
3476
3477 tail_bytes = (offset + bytes) & (align - 1);
3478 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3479
3480 bytes = ROUND_UP(bytes, align);
3481 }
3482
3483 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3484 use_local_qiov ? &local_qiov : qiov,
3485 flags);
3486
3487fail:
65afd211 3488 tracked_request_end(&req);
b404f720 3489
3b8242e0
KW
3490 if (use_local_qiov) {
3491 qemu_iovec_destroy(&local_qiov);
3b8242e0 3492 }
99c4a85c
KW
3493 qemu_vfree(head_buf);
3494 qemu_vfree(tail_buf);
3b8242e0 3495
b404f720
KW
3496 return ret;
3497}
3498
6601553e
KW
3499static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3500 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3501 BdrvRequestFlags flags)
3502{
3503 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3504 return -EINVAL;
3505 }
3506
3507 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3508 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3509}
3510
c5fbe571
SH
3511int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3512 int nb_sectors, QEMUIOVector *qiov)
3513{
3514 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3515
f08f2dda
SH
3516 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3517}
3518
3519int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
aa7bfbff
PL
3520 int64_t sector_num, int nb_sectors,
3521 BdrvRequestFlags flags)
f08f2dda 3522{
94d6ff21 3523 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
f08f2dda 3524
d32f35cb
PL
3525 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3526 flags &= ~BDRV_REQ_MAY_UNMAP;
3527 }
3528
f08f2dda 3529 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
aa7bfbff 3530 BDRV_REQ_ZERO_WRITE | flags);
c5fbe571
SH
3531}
3532
83f64091
FB
3533/**
3534 * Truncate file to 'offset' bytes (needed only for file protocols)
3535 */
3536int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3537{
3538 BlockDriver *drv = bs->drv;
51762288 3539 int ret;
83f64091 3540 if (!drv)
19cb3738 3541 return -ENOMEDIUM;
83f64091
FB
3542 if (!drv->bdrv_truncate)
3543 return -ENOTSUP;
59f2689d
NS
3544 if (bs->read_only)
3545 return -EACCES;
9c75e168 3546
51762288
SH
3547 ret = drv->bdrv_truncate(bs, offset);
3548 if (ret == 0) {
3549 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 3550 bdrv_dev_resize_cb(bs);
51762288
SH
3551 }
3552 return ret;
83f64091
FB
3553}
3554
4a1d5e1f
FZ
3555/**
3556 * Length of a allocated file in bytes. Sparse files are counted by actual
3557 * allocated space. Return < 0 if error or unknown.
3558 */
3559int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3560{
3561 BlockDriver *drv = bs->drv;
3562 if (!drv) {
3563 return -ENOMEDIUM;
3564 }
3565 if (drv->bdrv_get_allocated_file_size) {
3566 return drv->bdrv_get_allocated_file_size(bs);
3567 }
3568 if (bs->file) {
3569 return bdrv_get_allocated_file_size(bs->file);
3570 }
3571 return -ENOTSUP;
3572}
3573
83f64091 3574/**
65a9bb25 3575 * Return number of sectors on success, -errno on error.
83f64091 3576 */
65a9bb25 3577int64_t bdrv_nb_sectors(BlockDriverState *bs)
83f64091
FB
3578{
3579 BlockDriver *drv = bs->drv;
65a9bb25 3580
83f64091 3581 if (!drv)
19cb3738 3582 return -ENOMEDIUM;
51762288 3583
b94a2610
KW
3584 if (drv->has_variable_length) {
3585 int ret = refresh_total_sectors(bs, bs->total_sectors);
3586 if (ret < 0) {
3587 return ret;
46a4e4e6 3588 }
83f64091 3589 }
65a9bb25
MA
3590 return bs->total_sectors;
3591}
3592
3593/**
3594 * Return length in bytes on success, -errno on error.
3595 * The length is always a multiple of BDRV_SECTOR_SIZE.
3596 */
3597int64_t bdrv_getlength(BlockDriverState *bs)
3598{
3599 int64_t ret = bdrv_nb_sectors(bs);
3600
3601 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
fc01f7e7
FB
3602}
3603
19cb3738 3604/* return 0 as number of sectors if no device present or error */
96b8f136 3605void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 3606{
65a9bb25
MA
3607 int64_t nb_sectors = bdrv_nb_sectors(bs);
3608
3609 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
fc01f7e7 3610}
cf98951b 3611
ff06f5f3
PB
3612void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3613 BlockdevOnError on_write_error)
abd7f68d
MA
3614{
3615 bs->on_read_error = on_read_error;
3616 bs->on_write_error = on_write_error;
3617}
3618
1ceee0d5 3619BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
abd7f68d
MA
3620{
3621 return is_read ? bs->on_read_error : bs->on_write_error;
3622}
3623
3e1caa5f
PB
3624BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3625{
3626 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3627
3628 switch (on_err) {
3629 case BLOCKDEV_ON_ERROR_ENOSPC:
a589569f
WX
3630 return (error == ENOSPC) ?
3631 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
3e1caa5f 3632 case BLOCKDEV_ON_ERROR_STOP:
a589569f 3633 return BLOCK_ERROR_ACTION_STOP;
3e1caa5f 3634 case BLOCKDEV_ON_ERROR_REPORT:
a589569f 3635 return BLOCK_ERROR_ACTION_REPORT;
3e1caa5f 3636 case BLOCKDEV_ON_ERROR_IGNORE:
a589569f 3637 return BLOCK_ERROR_ACTION_IGNORE;
3e1caa5f
PB
3638 default:
3639 abort();
3640 }
3641}
3642
c7c2ff0c
LC
3643static void send_qmp_error_event(BlockDriverState *bs,
3644 BlockErrorAction action,
3645 bool is_read, int error)
3646{
3647 BlockErrorAction ac;
3648
3649 ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3650 qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
3651 bdrv_iostatus_is_enabled(bs),
624ff573
LC
3652 error == ENOSPC, strerror(error),
3653 &error_abort);
c7c2ff0c
LC
3654}
3655
3e1caa5f
PB
3656/* This is done by device models because, while the block layer knows
3657 * about the error, it does not know whether an operation comes from
3658 * the device or the block layer (from a job, for example).
3659 */
3660void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3661 bool is_read, int error)
3662{
3663 assert(error >= 0);
2bd3bce8 3664
a589569f 3665 if (action == BLOCK_ERROR_ACTION_STOP) {
2bd3bce8
PB
3666 /* First set the iostatus, so that "info block" returns an iostatus
3667 * that matches the events raised so far (an additional error iostatus
3668 * is fine, but not a lost one).
3669 */
3e1caa5f 3670 bdrv_iostatus_set_err(bs, error);
2bd3bce8
PB
3671
3672 /* Then raise the request to stop the VM and the event.
3673 * qemu_system_vmstop_request_prepare has two effects. First,
3674 * it ensures that the STOP event always comes after the
3675 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3676 * can observe the STOP event and do a "cont" before the STOP
3677 * event is issued, the VM will not stop. In this case, vm_start()
3678 * also ensures that the STOP/RESUME pair of events is emitted.
3679 */
3680 qemu_system_vmstop_request_prepare();
c7c2ff0c 3681 send_qmp_error_event(bs, action, is_read, error);
2bd3bce8
PB
3682 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3683 } else {
c7c2ff0c 3684 send_qmp_error_event(bs, action, is_read, error);
3e1caa5f
PB
3685 }
3686}
3687
b338082b
FB
3688int bdrv_is_read_only(BlockDriverState *bs)
3689{
3690 return bs->read_only;
3691}
3692
985a03b0
TS
3693int bdrv_is_sg(BlockDriverState *bs)
3694{
3695 return bs->sg;
3696}
3697
e900a7b7
CH
3698int bdrv_enable_write_cache(BlockDriverState *bs)
3699{
3700 return bs->enable_write_cache;
3701}
3702
425b0148
PB
3703void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3704{
3705 bs->enable_write_cache = wce;
55b110f2
JC
3706
3707 /* so a reopen() will preserve wce */
3708 if (wce) {
3709 bs->open_flags |= BDRV_O_CACHE_WB;
3710 } else {
3711 bs->open_flags &= ~BDRV_O_CACHE_WB;
3712 }
425b0148
PB
3713}
3714
ea2384d3
FB
3715int bdrv_is_encrypted(BlockDriverState *bs)
3716{
3717 if (bs->backing_hd && bs->backing_hd->encrypted)
3718 return 1;
3719 return bs->encrypted;
3720}
3721
c0f4ce77
AL
3722int bdrv_key_required(BlockDriverState *bs)
3723{
3724 BlockDriverState *backing_hd = bs->backing_hd;
3725
3726 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3727 return 1;
3728 return (bs->encrypted && !bs->valid_key);
3729}
3730
ea2384d3
FB
3731int bdrv_set_key(BlockDriverState *bs, const char *key)
3732{
3733 int ret;
3734 if (bs->backing_hd && bs->backing_hd->encrypted) {
3735 ret = bdrv_set_key(bs->backing_hd, key);
3736 if (ret < 0)
3737 return ret;
3738 if (!bs->encrypted)
3739 return 0;
3740 }
fd04a2ae
SH
3741 if (!bs->encrypted) {
3742 return -EINVAL;
3743 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3744 return -ENOMEDIUM;
3745 }
c0f4ce77 3746 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
3747 if (ret < 0) {
3748 bs->valid_key = 0;
3749 } else if (!bs->valid_key) {
3750 bs->valid_key = 1;
3751 /* call the change callback now, we skipped it on open */
7d4b4ba5 3752 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 3753 }
c0f4ce77 3754 return ret;
ea2384d3
FB
3755}
3756
f8d6bba1 3757const char *bdrv_get_format_name(BlockDriverState *bs)
ea2384d3 3758{
f8d6bba1 3759 return bs->drv ? bs->drv->format_name : NULL;
ea2384d3
FB
3760}
3761
ada42401
SH
3762static int qsort_strcmp(const void *a, const void *b)
3763{
3764 return strcmp(a, b);
3765}
3766
5fafdf24 3767void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
3768 void *opaque)
3769{
3770 BlockDriver *drv;
e855e4fb 3771 int count = 0;
ada42401 3772 int i;
e855e4fb 3773 const char **formats = NULL;
ea2384d3 3774
8a22f02a 3775 QLIST_FOREACH(drv, &bdrv_drivers, list) {
e855e4fb
JC
3776 if (drv->format_name) {
3777 bool found = false;
3778 int i = count;
3779 while (formats && i && !found) {
3780 found = !strcmp(formats[--i], drv->format_name);
3781 }
3782
3783 if (!found) {
5839e53b 3784 formats = g_renew(const char *, formats, count + 1);
e855e4fb 3785 formats[count++] = drv->format_name;
e855e4fb
JC
3786 }
3787 }
ea2384d3 3788 }
ada42401
SH
3789
3790 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3791
3792 for (i = 0; i < count; i++) {
3793 it(opaque, formats[i]);
3794 }
3795
e855e4fb 3796 g_free(formats);
ea2384d3
FB
3797}
3798
dc364f4c 3799/* This function is to find block backend bs */
b338082b
FB
3800BlockDriverState *bdrv_find(const char *name)
3801{
3802 BlockDriverState *bs;
3803
dc364f4c 3804 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1b7bdbc1 3805 if (!strcmp(name, bs->device_name)) {
b338082b 3806 return bs;
1b7bdbc1 3807 }
b338082b
FB
3808 }
3809 return NULL;
3810}
3811
dc364f4c
BC
3812/* This function is to find a node in the bs graph */
3813BlockDriverState *bdrv_find_node(const char *node_name)
3814{
3815 BlockDriverState *bs;
3816
3817 assert(node_name);
3818
3819 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3820 if (!strcmp(node_name, bs->node_name)) {
3821 return bs;
3822 }
3823 }
3824 return NULL;
3825}
3826
c13163fb
BC
3827/* Put this QMP function here so it can access the static graph_bdrv_states. */
3828BlockDeviceInfoList *bdrv_named_nodes_list(void)
3829{
3830 BlockDeviceInfoList *list, *entry;
3831 BlockDriverState *bs;
3832
3833 list = NULL;
3834 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3835 entry = g_malloc0(sizeof(*entry));
3836 entry->value = bdrv_block_device_info(bs);
3837 entry->next = list;
3838 list = entry;
3839 }
3840
3841 return list;
3842}
3843
12d3ba82
BC
3844BlockDriverState *bdrv_lookup_bs(const char *device,
3845 const char *node_name,
3846 Error **errp)
3847{
3848 BlockDriverState *bs = NULL;
3849
12d3ba82
BC
3850 if (device) {
3851 bs = bdrv_find(device);
3852
dd67fa50
BC
3853 if (bs) {
3854 return bs;
12d3ba82 3855 }
12d3ba82
BC
3856 }
3857
dd67fa50
BC
3858 if (node_name) {
3859 bs = bdrv_find_node(node_name);
12d3ba82 3860
dd67fa50
BC
3861 if (bs) {
3862 return bs;
3863 }
12d3ba82
BC
3864 }
3865
dd67fa50
BC
3866 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3867 device ? device : "",
3868 node_name ? node_name : "");
3869 return NULL;
12d3ba82
BC
3870}
3871
5a6684d2
JC
3872/* If 'base' is in the same chain as 'top', return true. Otherwise,
3873 * return false. If either argument is NULL, return false. */
3874bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3875{
3876 while (top && top != base) {
3877 top = top->backing_hd;
3878 }
3879
3880 return top != NULL;
3881}
3882
2f399b0a
MA
3883BlockDriverState *bdrv_next(BlockDriverState *bs)
3884{
3885 if (!bs) {
3886 return QTAILQ_FIRST(&bdrv_states);
3887 }
dc364f4c 3888 return QTAILQ_NEXT(bs, device_list);
2f399b0a
MA
3889}
3890
51de9760 3891void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
3892{
3893 BlockDriverState *bs;
3894
dc364f4c 3895 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
51de9760 3896 it(opaque, bs);
81d0912d
FB
3897 }
3898}
3899
ea2384d3
FB
3900const char *bdrv_get_device_name(BlockDriverState *bs)
3901{
3902 return bs->device_name;
3903}
3904
c8433287
MA
3905int bdrv_get_flags(BlockDriverState *bs)
3906{
3907 return bs->open_flags;
3908}
3909
f0f0fdfe 3910int bdrv_flush_all(void)
c6ca28d6
AL
3911{
3912 BlockDriverState *bs;
f0f0fdfe 3913 int result = 0;
c6ca28d6 3914
dc364f4c 3915 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
3916 AioContext *aio_context = bdrv_get_aio_context(bs);
3917 int ret;
3918
3919 aio_context_acquire(aio_context);
3920 ret = bdrv_flush(bs);
f0f0fdfe
KW
3921 if (ret < 0 && !result) {
3922 result = ret;
3923 }
ed78cda3 3924 aio_context_release(aio_context);
1b7bdbc1 3925 }
f0f0fdfe
KW
3926
3927 return result;
c6ca28d6
AL
3928}
3929
3ac21627
PL
3930int bdrv_has_zero_init_1(BlockDriverState *bs)
3931{
3932 return 1;
3933}
3934
f2feebbd
KW
3935int bdrv_has_zero_init(BlockDriverState *bs)
3936{
3937 assert(bs->drv);
3938
11212d8f
PB
3939 /* If BS is a copy on write image, it is initialized to
3940 the contents of the base image, which may not be zeroes. */
3941 if (bs->backing_hd) {
3942 return 0;
3943 }
336c1c12
KW
3944 if (bs->drv->bdrv_has_zero_init) {
3945 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
3946 }
3947
3ac21627
PL
3948 /* safe default */
3949 return 0;
f2feebbd
KW
3950}
3951
4ce78691
PL
3952bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3953{
3954 BlockDriverInfo bdi;
3955
3956 if (bs->backing_hd) {
3957 return false;
3958 }
3959
3960 if (bdrv_get_info(bs, &bdi) == 0) {
3961 return bdi.unallocated_blocks_are_zero;
3962 }
3963
3964 return false;
3965}
3966
3967bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3968{
3969 BlockDriverInfo bdi;
3970
3971 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3972 return false;
3973 }
3974
3975 if (bdrv_get_info(bs, &bdi) == 0) {
3976 return bdi.can_write_zeroes_with_unmap;
3977 }
3978
3979 return false;
3980}
3981
b6b8a333 3982typedef struct BdrvCoGetBlockStatusData {
376ae3f1 3983 BlockDriverState *bs;
b35b2bba 3984 BlockDriverState *base;
376ae3f1
SH
3985 int64_t sector_num;
3986 int nb_sectors;
3987 int *pnum;
b6b8a333 3988 int64_t ret;
376ae3f1 3989 bool done;
b6b8a333 3990} BdrvCoGetBlockStatusData;
376ae3f1 3991
f58c7b35
TS
3992/*
3993 * Returns true iff the specified sector is present in the disk image. Drivers
3994 * not implementing the functionality are assumed to not support backing files,
3995 * hence all their sectors are reported as allocated.
3996 *
bd9533e3
SH
3997 * If 'sector_num' is beyond the end of the disk image the return value is 0
3998 * and 'pnum' is set to 0.
3999 *
f58c7b35
TS
4000 * 'pnum' is set to the number of sectors (including and immediately following
4001 * the specified sector) that are known to be in the same
4002 * allocated/unallocated state.
4003 *
bd9533e3
SH
4004 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4005 * beyond the end of the disk image it will be clamped.
f58c7b35 4006 */
b6b8a333
PB
4007static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4008 int64_t sector_num,
4009 int nb_sectors, int *pnum)
f58c7b35 4010{
30a7f2fc 4011 int64_t total_sectors;
bd9533e3 4012 int64_t n;
5daa74a6 4013 int64_t ret, ret2;
bd9533e3 4014
30a7f2fc
MA
4015 total_sectors = bdrv_nb_sectors(bs);
4016 if (total_sectors < 0) {
4017 return total_sectors;
617ccb46
PB
4018 }
4019
30a7f2fc 4020 if (sector_num >= total_sectors) {
bd9533e3
SH
4021 *pnum = 0;
4022 return 0;
4023 }
4024
30a7f2fc 4025 n = total_sectors - sector_num;
bd9533e3
SH
4026 if (n < nb_sectors) {
4027 nb_sectors = n;
4028 }
4029
b6b8a333 4030 if (!bs->drv->bdrv_co_get_block_status) {
bd9533e3 4031 *pnum = nb_sectors;
e88ae226 4032 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
918e92d7
PB
4033 if (bs->drv->protocol_name) {
4034 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4035 }
4036 return ret;
f58c7b35 4037 }
6aebab14 4038
415b5b01
PB
4039 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4040 if (ret < 0) {
3e0a233d 4041 *pnum = 0;
415b5b01
PB
4042 return ret;
4043 }
4044
92bc50a5
PL
4045 if (ret & BDRV_BLOCK_RAW) {
4046 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4047 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4048 *pnum, pnum);
4049 }
4050
e88ae226
KW
4051 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4052 ret |= BDRV_BLOCK_ALLOCATED;
4053 }
4054
c3d86884
PL
4055 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4056 if (bdrv_unallocated_blocks_are_zero(bs)) {
f0ad5712 4057 ret |= BDRV_BLOCK_ZERO;
1f9db224 4058 } else if (bs->backing_hd) {
f0ad5712 4059 BlockDriverState *bs2 = bs->backing_hd;
30a7f2fc
MA
4060 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4061 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
f0ad5712
PB
4062 ret |= BDRV_BLOCK_ZERO;
4063 }
4064 }
415b5b01 4065 }
5daa74a6
PB
4066
4067 if (bs->file &&
4068 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4069 (ret & BDRV_BLOCK_OFFSET_VALID)) {
4070 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4071 *pnum, pnum);
4072 if (ret2 >= 0) {
4073 /* Ignore errors. This is just providing extra information, it
4074 * is useful but not necessary.
4075 */
4076 ret |= (ret2 & BDRV_BLOCK_ZERO);
4077 }
4078 }
4079
415b5b01 4080 return ret;
060f51c9
SH
4081}
4082
b6b8a333
PB
4083/* Coroutine wrapper for bdrv_get_block_status() */
4084static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
060f51c9 4085{
b6b8a333 4086 BdrvCoGetBlockStatusData *data = opaque;
060f51c9
SH
4087 BlockDriverState *bs = data->bs;
4088
b6b8a333
PB
4089 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4090 data->pnum);
060f51c9
SH
4091 data->done = true;
4092}
4093
4094/*
b6b8a333 4095 * Synchronous wrapper around bdrv_co_get_block_status().
060f51c9 4096 *
b6b8a333 4097 * See bdrv_co_get_block_status() for details.
060f51c9 4098 */
b6b8a333
PB
4099int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4100 int nb_sectors, int *pnum)
060f51c9 4101{
6aebab14 4102 Coroutine *co;
b6b8a333 4103 BdrvCoGetBlockStatusData data = {
6aebab14
SH
4104 .bs = bs,
4105 .sector_num = sector_num,
4106 .nb_sectors = nb_sectors,
4107 .pnum = pnum,
4108 .done = false,
4109 };
4110
bdad13b9
PB
4111 if (qemu_in_coroutine()) {
4112 /* Fast-path if already in coroutine context */
b6b8a333 4113 bdrv_get_block_status_co_entry(&data);
bdad13b9 4114 } else {
2572b37a
SH
4115 AioContext *aio_context = bdrv_get_aio_context(bs);
4116
b6b8a333 4117 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
bdad13b9
PB
4118 qemu_coroutine_enter(co, &data);
4119 while (!data.done) {
2572b37a 4120 aio_poll(aio_context, true);
bdad13b9 4121 }
6aebab14
SH
4122 }
4123 return data.ret;
f58c7b35
TS
4124}
4125
b6b8a333
PB
4126int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4127 int nb_sectors, int *pnum)
4128{
4333bb71
PB
4129 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4130 if (ret < 0) {
4131 return ret;
4132 }
01fb2705 4133 return !!(ret & BDRV_BLOCK_ALLOCATED);
b6b8a333
PB
4134}
4135
188a7bbf
PB
4136/*
4137 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4138 *
4139 * Return true if the given sector is allocated in any image between
4140 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4141 * sector is allocated in any image of the chain. Return false otherwise.
4142 *
4143 * 'pnum' is set to the number of sectors (including and immediately following
4144 * the specified sector) that are known to be in the same
4145 * allocated/unallocated state.
4146 *
4147 */
4f578637
PB
4148int bdrv_is_allocated_above(BlockDriverState *top,
4149 BlockDriverState *base,
4150 int64_t sector_num,
4151 int nb_sectors, int *pnum)
188a7bbf
PB
4152{
4153 BlockDriverState *intermediate;
4154 int ret, n = nb_sectors;
4155
4156 intermediate = top;
4157 while (intermediate && intermediate != base) {
4158 int pnum_inter;
bdad13b9
PB
4159 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4160 &pnum_inter);
188a7bbf
PB
4161 if (ret < 0) {
4162 return ret;
4163 } else if (ret) {
4164 *pnum = pnum_inter;
4165 return 1;
4166 }
4167
4168 /*
4169 * [sector_num, nb_sectors] is unallocated on top but intermediate
4170 * might have
4171 *
4172 * [sector_num+x, nr_sectors] allocated.
4173 */
63ba17d3
VI
4174 if (n > pnum_inter &&
4175 (intermediate == top ||
4176 sector_num + pnum_inter < intermediate->total_sectors)) {
188a7bbf
PB
4177 n = pnum_inter;
4178 }
4179
4180 intermediate = intermediate->backing_hd;
4181 }
4182
4183 *pnum = n;
4184 return 0;
4185}
4186
045df330
AL
4187const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4188{
4189 if (bs->backing_hd && bs->backing_hd->encrypted)
4190 return bs->backing_file;
4191 else if (bs->encrypted)
4192 return bs->filename;
4193 else
4194 return NULL;
4195}
4196
5fafdf24 4197void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
4198 char *filename, int filename_size)
4199{
3574c608 4200 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
4201}
4202
5fafdf24 4203int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
4204 const uint8_t *buf, int nb_sectors)
4205{
4206 BlockDriver *drv = bs->drv;
4207 if (!drv)
19cb3738 4208 return -ENOMEDIUM;
faea38e7
FB
4209 if (!drv->bdrv_write_compressed)
4210 return -ENOTSUP;
fbb7b4e0
KW
4211 if (bdrv_check_request(bs, sector_num, nb_sectors))
4212 return -EIO;
a55eb92c 4213
e4654d2d 4214 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
a55eb92c 4215
faea38e7
FB
4216 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4217}
3b46e624 4218
faea38e7
FB
4219int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4220{
4221 BlockDriver *drv = bs->drv;
4222 if (!drv)
19cb3738 4223 return -ENOMEDIUM;
faea38e7
FB
4224 if (!drv->bdrv_get_info)
4225 return -ENOTSUP;
4226 memset(bdi, 0, sizeof(*bdi));
4227 return drv->bdrv_get_info(bs, bdi);
4228}
4229
eae041fe
HR
4230ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4231{
4232 BlockDriver *drv = bs->drv;
4233 if (drv && drv->bdrv_get_specific_info) {
4234 return drv->bdrv_get_specific_info(bs);
4235 }
4236 return NULL;
4237}
4238
45566e9c
CH
4239int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4240 int64_t pos, int size)
cf8074b3
KW
4241{
4242 QEMUIOVector qiov;
4243 struct iovec iov = {
4244 .iov_base = (void *) buf,
4245 .iov_len = size,
4246 };
4247
4248 qemu_iovec_init_external(&qiov, &iov, 1);
4249 return bdrv_writev_vmstate(bs, &qiov, pos);
4250}
4251
4252int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
178e08a5
AL
4253{
4254 BlockDriver *drv = bs->drv;
cf8074b3
KW
4255
4256 if (!drv) {
178e08a5 4257 return -ENOMEDIUM;
cf8074b3
KW
4258 } else if (drv->bdrv_save_vmstate) {
4259 return drv->bdrv_save_vmstate(bs, qiov, pos);
4260 } else if (bs->file) {
4261 return bdrv_writev_vmstate(bs->file, qiov, pos);
4262 }
4263
7cdb1f6d 4264 return -ENOTSUP;
178e08a5
AL
4265}
4266
45566e9c
CH
4267int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4268 int64_t pos, int size)
178e08a5
AL
4269{
4270 BlockDriver *drv = bs->drv;
4271 if (!drv)
4272 return -ENOMEDIUM;
7cdb1f6d
MK
4273 if (drv->bdrv_load_vmstate)
4274 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4275 if (bs->file)
4276 return bdrv_load_vmstate(bs->file, buf, pos, size);
4277 return -ENOTSUP;
178e08a5
AL
4278}
4279
8b9b0cc2
KW
4280void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4281{
bf736fe3 4282 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
8b9b0cc2
KW
4283 return;
4284 }
4285
bf736fe3 4286 bs->drv->bdrv_debug_event(bs, event);
41c695c7
KW
4287}
4288
4289int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4290 const char *tag)
4291{
4292 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4293 bs = bs->file;
4294 }
4295
4296 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4297 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4298 }
4299
4300 return -ENOTSUP;
4301}
4302
4cc70e93
FZ
4303int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4304{
4305 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4306 bs = bs->file;
4307 }
4308
4309 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4310 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4311 }
4312
4313 return -ENOTSUP;
4314}
4315
41c695c7
KW
4316int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4317{
938789ea 4318 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
41c695c7
KW
4319 bs = bs->file;
4320 }
8b9b0cc2 4321
41c695c7
KW
4322 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4323 return bs->drv->bdrv_debug_resume(bs, tag);
4324 }
4325
4326 return -ENOTSUP;
4327}
4328
4329bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4330{
4331 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4332 bs = bs->file;
4333 }
4334
4335 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4336 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4337 }
4338
4339 return false;
8b9b0cc2
KW
4340}
4341
199630b6
BS
4342int bdrv_is_snapshot(BlockDriverState *bs)
4343{
4344 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4345}
4346
b1b1d783
JC
4347/* backing_file can either be relative, or absolute, or a protocol. If it is
4348 * relative, it must be relative to the chain. So, passing in bs->filename
4349 * from a BDS as backing_file should not be done, as that may be relative to
4350 * the CWD rather than the chain. */
e8a6bb9c
MT
4351BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4352 const char *backing_file)
4353{
b1b1d783
JC
4354 char *filename_full = NULL;
4355 char *backing_file_full = NULL;
4356 char *filename_tmp = NULL;
4357 int is_protocol = 0;
4358 BlockDriverState *curr_bs = NULL;
4359 BlockDriverState *retval = NULL;
4360
4361 if (!bs || !bs->drv || !backing_file) {
e8a6bb9c
MT
4362 return NULL;
4363 }
4364
b1b1d783
JC
4365 filename_full = g_malloc(PATH_MAX);
4366 backing_file_full = g_malloc(PATH_MAX);
4367 filename_tmp = g_malloc(PATH_MAX);
4368
4369 is_protocol = path_has_protocol(backing_file);
4370
4371 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4372
4373 /* If either of the filename paths is actually a protocol, then
4374 * compare unmodified paths; otherwise make paths relative */
4375 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4376 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4377 retval = curr_bs->backing_hd;
4378 break;
4379 }
e8a6bb9c 4380 } else {
b1b1d783
JC
4381 /* If not an absolute filename path, make it relative to the current
4382 * image's filename path */
4383 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4384 backing_file);
4385
4386 /* We are going to compare absolute pathnames */
4387 if (!realpath(filename_tmp, filename_full)) {
4388 continue;
4389 }
4390
4391 /* We need to make sure the backing filename we are comparing against
4392 * is relative to the current image filename (or absolute) */
4393 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4394 curr_bs->backing_file);
4395
4396 if (!realpath(filename_tmp, backing_file_full)) {
4397 continue;
4398 }
4399
4400 if (strcmp(backing_file_full, filename_full) == 0) {
4401 retval = curr_bs->backing_hd;
4402 break;
4403 }
e8a6bb9c
MT
4404 }
4405 }
4406
b1b1d783
JC
4407 g_free(filename_full);
4408 g_free(backing_file_full);
4409 g_free(filename_tmp);
4410 return retval;
e8a6bb9c
MT
4411}
4412
f198fd1c
BC
4413int bdrv_get_backing_file_depth(BlockDriverState *bs)
4414{
4415 if (!bs->drv) {
4416 return 0;
4417 }
4418
4419 if (!bs->backing_hd) {
4420 return 0;
4421 }
4422
4423 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4424}
4425
ea2384d3 4426/**************************************************************/
83f64091 4427/* async I/Os */
ea2384d3 4428
3b69e4b9 4429BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 4430 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 4431 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 4432{
bbf0a440
SH
4433 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4434
d20d9b7c 4435 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
8c5873d6 4436 cb, opaque, false);
ea2384d3
FB
4437}
4438
f141eafe
AL
4439BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4440 QEMUIOVector *qiov, int nb_sectors,
4441 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 4442{
bbf0a440
SH
4443 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4444
d20d9b7c 4445 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
8c5873d6 4446 cb, opaque, true);
83f64091
FB
4447}
4448
d5ef94d4
PB
4449BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4450 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4451 BlockDriverCompletionFunc *cb, void *opaque)
4452{
4453 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4454
4455 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4456 BDRV_REQ_ZERO_WRITE | flags,
4457 cb, opaque, true);
4458}
4459
40b4f539
KW
4460
4461typedef struct MultiwriteCB {
4462 int error;
4463 int num_requests;
4464 int num_callbacks;
4465 struct {
4466 BlockDriverCompletionFunc *cb;
4467 void *opaque;
4468 QEMUIOVector *free_qiov;
40b4f539
KW
4469 } callbacks[];
4470} MultiwriteCB;
4471
4472static void multiwrite_user_cb(MultiwriteCB *mcb)
4473{
4474 int i;
4475
4476 for (i = 0; i < mcb->num_callbacks; i++) {
4477 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
4478 if (mcb->callbacks[i].free_qiov) {
4479 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4480 }
7267c094 4481 g_free(mcb->callbacks[i].free_qiov);
40b4f539
KW
4482 }
4483}
4484
4485static void multiwrite_cb(void *opaque, int ret)
4486{
4487 MultiwriteCB *mcb = opaque;
4488
6d519a5f
SH
4489 trace_multiwrite_cb(mcb, ret);
4490
cb6d3ca0 4491 if (ret < 0 && !mcb->error) {
40b4f539 4492 mcb->error = ret;
40b4f539
KW
4493 }
4494
4495 mcb->num_requests--;
4496 if (mcb->num_requests == 0) {
de189a1b 4497 multiwrite_user_cb(mcb);
7267c094 4498 g_free(mcb);
40b4f539
KW
4499 }
4500}
4501
4502static int multiwrite_req_compare(const void *a, const void *b)
4503{
77be4366
CH
4504 const BlockRequest *req1 = a, *req2 = b;
4505
4506 /*
4507 * Note that we can't simply subtract req2->sector from req1->sector
4508 * here as that could overflow the return value.
4509 */
4510 if (req1->sector > req2->sector) {
4511 return 1;
4512 } else if (req1->sector < req2->sector) {
4513 return -1;
4514 } else {
4515 return 0;
4516 }
40b4f539
KW
4517}
4518
4519/*
4520 * Takes a bunch of requests and tries to merge them. Returns the number of
4521 * requests that remain after merging.
4522 */
4523static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4524 int num_reqs, MultiwriteCB *mcb)
4525{
4526 int i, outidx;
4527
4528 // Sort requests by start sector
4529 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4530
4531 // Check if adjacent requests touch the same clusters. If so, combine them,
4532 // filling up gaps with zero sectors.
4533 outidx = 0;
4534 for (i = 1; i < num_reqs; i++) {
4535 int merge = 0;
4536 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4537
b6a127a1 4538 // Handle exactly sequential writes and overlapping writes.
40b4f539
KW
4539 if (reqs[i].sector <= oldreq_last) {
4540 merge = 1;
4541 }
4542
e2a305fb
CH
4543 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4544 merge = 0;
4545 }
4546
40b4f539
KW
4547 if (merge) {
4548 size_t size;
7267c094 4549 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
4550 qemu_iovec_init(qiov,
4551 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4552
4553 // Add the first request to the merged one. If the requests are
4554 // overlapping, drop the last sectors of the first request.
4555 size = (reqs[i].sector - reqs[outidx].sector) << 9;
1b093c48 4556 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
40b4f539 4557
b6a127a1
PB
4558 // We should need to add any zeros between the two requests
4559 assert (reqs[i].sector <= oldreq_last);
40b4f539
KW
4560
4561 // Add the second request
1b093c48 4562 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
40b4f539 4563
391827eb
SH
4564 // Add tail of first request, if necessary
4565 if (qiov->size < reqs[outidx].qiov->size) {
4566 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4567 reqs[outidx].qiov->size - qiov->size);
4568 }
4569
cbf1dff2 4570 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
4571 reqs[outidx].qiov = qiov;
4572
4573 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4574 } else {
4575 outidx++;
4576 reqs[outidx].sector = reqs[i].sector;
4577 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4578 reqs[outidx].qiov = reqs[i].qiov;
4579 }
4580 }
4581
4582 return outidx + 1;
4583}
4584
4585/*
4586 * Submit multiple AIO write requests at once.
4587 *
4588 * On success, the function returns 0 and all requests in the reqs array have
4589 * been submitted. In error case this function returns -1, and any of the
4590 * requests may or may not be submitted yet. In particular, this means that the
4591 * callback will be called for some of the requests, for others it won't. The
4592 * caller must check the error field of the BlockRequest to wait for the right
4593 * callbacks (if error != 0, no callback will be called).
4594 *
4595 * The implementation may modify the contents of the reqs array, e.g. to merge
4596 * requests. However, the fields opaque and error are left unmodified as they
4597 * are used to signal failure for a single request to the caller.
4598 */
4599int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4600{
40b4f539
KW
4601 MultiwriteCB *mcb;
4602 int i;
4603
301db7c2
RH
4604 /* don't submit writes if we don't have a medium */
4605 if (bs->drv == NULL) {
4606 for (i = 0; i < num_reqs; i++) {
4607 reqs[i].error = -ENOMEDIUM;
4608 }
4609 return -1;
4610 }
4611
40b4f539
KW
4612 if (num_reqs == 0) {
4613 return 0;
4614 }
4615
4616 // Create MultiwriteCB structure
7267c094 4617 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
4618 mcb->num_requests = 0;
4619 mcb->num_callbacks = num_reqs;
4620
4621 for (i = 0; i < num_reqs; i++) {
4622 mcb->callbacks[i].cb = reqs[i].cb;
4623 mcb->callbacks[i].opaque = reqs[i].opaque;
4624 }
4625
4626 // Check for mergable requests
4627 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4628
6d519a5f
SH
4629 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4630
df9309fb
PB
4631 /* Run the aio requests. */
4632 mcb->num_requests = num_reqs;
40b4f539 4633 for (i = 0; i < num_reqs; i++) {
d20d9b7c
PB
4634 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4635 reqs[i].nb_sectors, reqs[i].flags,
4636 multiwrite_cb, mcb,
4637 true);
40b4f539
KW
4638 }
4639
4640 return 0;
40b4f539
KW
4641}
4642
83f64091 4643void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 4644{
ca5fd113
FZ
4645 qemu_aio_ref(acb);
4646 bdrv_aio_cancel_async(acb);
4647 while (acb->refcnt > 1) {
4648 if (acb->aiocb_info->get_aio_context) {
4649 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4650 } else if (acb->bs) {
4651 aio_poll(bdrv_get_aio_context(acb->bs), true);
4652 } else {
4653 abort();
02c50efe 4654 }
02c50efe 4655 }
8007429a 4656 qemu_aio_unref(acb);
02c50efe
FZ
4657}
4658
4659/* Async version of aio cancel. The caller is not blocked if the acb implements
4660 * cancel_async, otherwise we do nothing and let the request normally complete.
4661 * In either case the completion callback must be called. */
4662void bdrv_aio_cancel_async(BlockDriverAIOCB *acb)
4663{
4664 if (acb->aiocb_info->cancel_async) {
4665 acb->aiocb_info->cancel_async(acb);
4666 }
83f64091
FB
4667}
4668
4669/**************************************************************/
4670/* async block device emulation */
4671
c16b5a2c
CH
4672typedef struct BlockDriverAIOCBSync {
4673 BlockDriverAIOCB common;
4674 QEMUBH *bh;
4675 int ret;
4676 /* vector translation state */
4677 QEMUIOVector *qiov;
4678 uint8_t *bounce;
4679 int is_write;
4680} BlockDriverAIOCBSync;
4681
d7331bed 4682static const AIOCBInfo bdrv_em_aiocb_info = {
c16b5a2c 4683 .aiocb_size = sizeof(BlockDriverAIOCBSync),
c16b5a2c
CH
4684};
4685
ce1a14dc 4686static void bdrv_aio_bh_cb(void *opaque)
83f64091 4687{
ce1a14dc 4688 BlockDriverAIOCBSync *acb = opaque;
f141eafe 4689
857d4f46 4690 if (!acb->is_write && acb->ret >= 0) {
03396148 4691 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
857d4f46 4692 }
ceb42de8 4693 qemu_vfree(acb->bounce);
ce1a14dc 4694 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 4695 qemu_bh_delete(acb->bh);
36afc451 4696 acb->bh = NULL;
8007429a 4697 qemu_aio_unref(acb);
83f64091 4698}
beac80cd 4699
f141eafe
AL
4700static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4701 int64_t sector_num,
4702 QEMUIOVector *qiov,
4703 int nb_sectors,
4704 BlockDriverCompletionFunc *cb,
4705 void *opaque,
4706 int is_write)
4707
83f64091 4708{
ce1a14dc 4709 BlockDriverAIOCBSync *acb;
ce1a14dc 4710
d7331bed 4711 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
f141eafe
AL
4712 acb->is_write = is_write;
4713 acb->qiov = qiov;
857d4f46 4714 acb->bounce = qemu_try_blockalign(bs, qiov->size);
2572b37a 4715 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
f141eafe 4716
857d4f46
KW
4717 if (acb->bounce == NULL) {
4718 acb->ret = -ENOMEM;
4719 } else if (is_write) {
d5e6b161 4720 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
1ed20acf 4721 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 4722 } else {
1ed20acf 4723 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
4724 }
4725
ce1a14dc 4726 qemu_bh_schedule(acb->bh);
f141eafe 4727
ce1a14dc 4728 return &acb->common;
beac80cd
FB
4729}
4730
f141eafe
AL
4731static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4732 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 4733 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 4734{
f141eafe
AL
4735 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
4736}
83f64091 4737
f141eafe
AL
4738static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4739 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4740 BlockDriverCompletionFunc *cb, void *opaque)
4741{
4742 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 4743}
beac80cd 4744
68485420
KW
4745
4746typedef struct BlockDriverAIOCBCoroutine {
4747 BlockDriverAIOCB common;
4748 BlockRequest req;
4749 bool is_write;
d318aea9 4750 bool *done;
68485420
KW
4751 QEMUBH* bh;
4752} BlockDriverAIOCBCoroutine;
4753
d7331bed 4754static const AIOCBInfo bdrv_em_co_aiocb_info = {
68485420 4755 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
68485420
KW
4756};
4757
35246a68 4758static void bdrv_co_em_bh(void *opaque)
68485420
KW
4759{
4760 BlockDriverAIOCBCoroutine *acb = opaque;
4761
4762 acb->common.cb(acb->common.opaque, acb->req.error);
d318aea9 4763
68485420 4764 qemu_bh_delete(acb->bh);
8007429a 4765 qemu_aio_unref(acb);
68485420
KW
4766}
4767
b2a61371
SH
4768/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4769static void coroutine_fn bdrv_co_do_rw(void *opaque)
4770{
4771 BlockDriverAIOCBCoroutine *acb = opaque;
4772 BlockDriverState *bs = acb->common.bs;
4773
4774 if (!acb->is_write) {
4775 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
d20d9b7c 4776 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
b2a61371
SH
4777 } else {
4778 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
d20d9b7c 4779 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
b2a61371
SH
4780 }
4781
2572b37a 4782 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
b2a61371
SH
4783 qemu_bh_schedule(acb->bh);
4784}
4785
68485420
KW
4786static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4787 int64_t sector_num,
4788 QEMUIOVector *qiov,
4789 int nb_sectors,
d20d9b7c 4790 BdrvRequestFlags flags,
68485420
KW
4791 BlockDriverCompletionFunc *cb,
4792 void *opaque,
8c5873d6 4793 bool is_write)
68485420
KW
4794{
4795 Coroutine *co;
4796 BlockDriverAIOCBCoroutine *acb;
4797
d7331bed 4798 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
68485420
KW
4799 acb->req.sector = sector_num;
4800 acb->req.nb_sectors = nb_sectors;
4801 acb->req.qiov = qiov;
d20d9b7c 4802 acb->req.flags = flags;
68485420
KW
4803 acb->is_write = is_write;
4804
8c5873d6 4805 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
4806 qemu_coroutine_enter(co, acb);
4807
4808 return &acb->common;
4809}
4810
07f07615 4811static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 4812{
07f07615
PB
4813 BlockDriverAIOCBCoroutine *acb = opaque;
4814 BlockDriverState *bs = acb->common.bs;
b2e12bc6 4815
07f07615 4816 acb->req.error = bdrv_co_flush(bs);
2572b37a 4817 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
b2e12bc6 4818 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
4819}
4820
07f07615 4821BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
4822 BlockDriverCompletionFunc *cb, void *opaque)
4823{
07f07615 4824 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 4825
07f07615
PB
4826 Coroutine *co;
4827 BlockDriverAIOCBCoroutine *acb;
016f5cf6 4828
d7331bed 4829 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
d318aea9 4830
07f07615
PB
4831 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4832 qemu_coroutine_enter(co, acb);
016f5cf6 4833
016f5cf6
AG
4834 return &acb->common;
4835}
4836
4265d620
PB
4837static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4838{
4839 BlockDriverAIOCBCoroutine *acb = opaque;
4840 BlockDriverState *bs = acb->common.bs;
4841
4842 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2572b37a 4843 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4265d620
PB
4844 qemu_bh_schedule(acb->bh);
4845}
4846
4847BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4848 int64_t sector_num, int nb_sectors,
4849 BlockDriverCompletionFunc *cb, void *opaque)
4850{
4851 Coroutine *co;
4852 BlockDriverAIOCBCoroutine *acb;
4853
4854 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4855
d7331bed 4856 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4265d620
PB
4857 acb->req.sector = sector_num;
4858 acb->req.nb_sectors = nb_sectors;
4859 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4860 qemu_coroutine_enter(co, acb);
4861
4862 return &acb->common;
4863}
4864
ea2384d3
FB
4865void bdrv_init(void)
4866{
5efa9d5a 4867 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 4868}
ce1a14dc 4869
eb852011
MA
4870void bdrv_init_with_whitelist(void)
4871{
4872 use_bdrv_whitelist = 1;
4873 bdrv_init();
4874}
4875
d7331bed 4876void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
c16b5a2c 4877 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 4878{
ce1a14dc
PB
4879 BlockDriverAIOCB *acb;
4880
d7331bed
SH
4881 acb = g_slice_alloc(aiocb_info->aiocb_size);
4882 acb->aiocb_info = aiocb_info;
ce1a14dc
PB
4883 acb->bs = bs;
4884 acb->cb = cb;
4885 acb->opaque = opaque;
f197fe2b 4886 acb->refcnt = 1;
ce1a14dc
PB
4887 return acb;
4888}
4889
f197fe2b
FZ
4890void qemu_aio_ref(void *p)
4891{
4892 BlockDriverAIOCB *acb = p;
4893 acb->refcnt++;
4894}
4895
8007429a 4896void qemu_aio_unref(void *p)
ce1a14dc 4897{
d37c975f 4898 BlockDriverAIOCB *acb = p;
f197fe2b
FZ
4899 assert(acb->refcnt > 0);
4900 if (--acb->refcnt == 0) {
4901 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4902 }
ce1a14dc 4903}
19cb3738 4904
f9f05dc5
KW
4905/**************************************************************/
4906/* Coroutine block device emulation */
4907
4908typedef struct CoroutineIOCompletion {
4909 Coroutine *coroutine;
4910 int ret;
4911} CoroutineIOCompletion;
4912
4913static void bdrv_co_io_em_complete(void *opaque, int ret)
4914{
4915 CoroutineIOCompletion *co = opaque;
4916
4917 co->ret = ret;
4918 qemu_coroutine_enter(co->coroutine, NULL);
4919}
4920
4921static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4922 int nb_sectors, QEMUIOVector *iov,
4923 bool is_write)
4924{
4925 CoroutineIOCompletion co = {
4926 .coroutine = qemu_coroutine_self(),
4927 };
4928 BlockDriverAIOCB *acb;
4929
4930 if (is_write) {
a652d160
SH
4931 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4932 bdrv_co_io_em_complete, &co);
f9f05dc5 4933 } else {
a652d160
SH
4934 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4935 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
4936 }
4937
59370aaa 4938 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
4939 if (!acb) {
4940 return -EIO;
4941 }
4942 qemu_coroutine_yield();
4943
4944 return co.ret;
4945}
4946
4947static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4948 int64_t sector_num, int nb_sectors,
4949 QEMUIOVector *iov)
4950{
4951 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4952}
4953
4954static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4955 int64_t sector_num, int nb_sectors,
4956 QEMUIOVector *iov)
4957{
4958 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4959}
4960
07f07615 4961static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 4962{
07f07615
PB
4963 RwCo *rwco = opaque;
4964
4965 rwco->ret = bdrv_co_flush(rwco->bs);
4966}
4967
4968int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4969{
eb489bb1
KW
4970 int ret;
4971
29cdb251 4972 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
07f07615 4973 return 0;
eb489bb1
KW
4974 }
4975
ca716364 4976 /* Write back cached data to the OS even with cache=unsafe */
bf736fe3 4977 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
eb489bb1
KW
4978 if (bs->drv->bdrv_co_flush_to_os) {
4979 ret = bs->drv->bdrv_co_flush_to_os(bs);
4980 if (ret < 0) {
4981 return ret;
4982 }
4983 }
4984
ca716364
KW
4985 /* But don't actually force it to the disk with cache=unsafe */
4986 if (bs->open_flags & BDRV_O_NO_FLUSH) {
d4c82329 4987 goto flush_parent;
ca716364
KW
4988 }
4989
bf736fe3 4990 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
eb489bb1 4991 if (bs->drv->bdrv_co_flush_to_disk) {
29cdb251 4992 ret = bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
4993 } else if (bs->drv->bdrv_aio_flush) {
4994 BlockDriverAIOCB *acb;
4995 CoroutineIOCompletion co = {
4996 .coroutine = qemu_coroutine_self(),
4997 };
4998
4999 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5000 if (acb == NULL) {
29cdb251 5001 ret = -EIO;
07f07615
PB
5002 } else {
5003 qemu_coroutine_yield();
29cdb251 5004 ret = co.ret;
07f07615 5005 }
07f07615
PB
5006 } else {
5007 /*
5008 * Some block drivers always operate in either writethrough or unsafe
5009 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5010 * know how the server works (because the behaviour is hardcoded or
5011 * depends on server-side configuration), so we can't ensure that
5012 * everything is safe on disk. Returning an error doesn't work because
5013 * that would break guests even if the server operates in writethrough
5014 * mode.
5015 *
5016 * Let's hope the user knows what he's doing.
5017 */
29cdb251 5018 ret = 0;
07f07615 5019 }
29cdb251
PB
5020 if (ret < 0) {
5021 return ret;
5022 }
5023
5024 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5025 * in the case of cache=unsafe, so there are no useless flushes.
5026 */
d4c82329 5027flush_parent:
29cdb251 5028 return bdrv_co_flush(bs->file);
07f07615
PB
5029}
5030
5a8a30db 5031void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
0f15423c 5032{
5a8a30db
KW
5033 Error *local_err = NULL;
5034 int ret;
5035
3456a8d1
KW
5036 if (!bs->drv) {
5037 return;
5038 }
5039
5040 if (bs->drv->bdrv_invalidate_cache) {
5a8a30db 5041 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3456a8d1 5042 } else if (bs->file) {
5a8a30db
KW
5043 bdrv_invalidate_cache(bs->file, &local_err);
5044 }
5045 if (local_err) {
5046 error_propagate(errp, local_err);
5047 return;
0f15423c 5048 }
3456a8d1 5049
5a8a30db
KW
5050 ret = refresh_total_sectors(bs, bs->total_sectors);
5051 if (ret < 0) {
5052 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5053 return;
5054 }
0f15423c
AL
5055}
5056
5a8a30db 5057void bdrv_invalidate_cache_all(Error **errp)
0f15423c
AL
5058{
5059 BlockDriverState *bs;
5a8a30db 5060 Error *local_err = NULL;
0f15423c 5061
dc364f4c 5062 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
5063 AioContext *aio_context = bdrv_get_aio_context(bs);
5064
5065 aio_context_acquire(aio_context);
5a8a30db 5066 bdrv_invalidate_cache(bs, &local_err);
ed78cda3 5067 aio_context_release(aio_context);
5a8a30db
KW
5068 if (local_err) {
5069 error_propagate(errp, local_err);
5070 return;
5071 }
0f15423c
AL
5072 }
5073}
5074
07789269
BC
5075void bdrv_clear_incoming_migration_all(void)
5076{
5077 BlockDriverState *bs;
5078
dc364f4c 5079 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
5080 AioContext *aio_context = bdrv_get_aio_context(bs);
5081
5082 aio_context_acquire(aio_context);
07789269 5083 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
ed78cda3 5084 aio_context_release(aio_context);
07789269
BC
5085 }
5086}
5087
07f07615
PB
5088int bdrv_flush(BlockDriverState *bs)
5089{
5090 Coroutine *co;
5091 RwCo rwco = {
5092 .bs = bs,
5093 .ret = NOT_DONE,
e7a8a783 5094 };
e7a8a783 5095
07f07615
PB
5096 if (qemu_in_coroutine()) {
5097 /* Fast-path if already in coroutine context */
5098 bdrv_flush_co_entry(&rwco);
5099 } else {
2572b37a
SH
5100 AioContext *aio_context = bdrv_get_aio_context(bs);
5101
07f07615
PB
5102 co = qemu_coroutine_create(bdrv_flush_co_entry);
5103 qemu_coroutine_enter(co, &rwco);
5104 while (rwco.ret == NOT_DONE) {
2572b37a 5105 aio_poll(aio_context, true);
07f07615 5106 }
e7a8a783 5107 }
07f07615
PB
5108
5109 return rwco.ret;
e7a8a783
KW
5110}
5111
775aa8b6
KW
5112typedef struct DiscardCo {
5113 BlockDriverState *bs;
5114 int64_t sector_num;
5115 int nb_sectors;
5116 int ret;
5117} DiscardCo;
4265d620
PB
5118static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5119{
775aa8b6 5120 DiscardCo *rwco = opaque;
4265d620
PB
5121
5122 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5123}
5124
6f14da52
PL
5125/* if no limit is specified in the BlockLimits use a default
5126 * of 32768 512-byte sectors (16 MiB) per request.
5127 */
5128#define MAX_DISCARD_DEFAULT 32768
5129
4265d620
PB
5130int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5131 int nb_sectors)
5132{
d51e9fe5
PB
5133 int max_discard;
5134
4265d620
PB
5135 if (!bs->drv) {
5136 return -ENOMEDIUM;
5137 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5138 return -EIO;
5139 } else if (bs->read_only) {
5140 return -EROFS;
df702c9b
PB
5141 }
5142
e4654d2d 5143 bdrv_reset_dirty(bs, sector_num, nb_sectors);
df702c9b 5144
9e8f1835
PB
5145 /* Do nothing if disabled. */
5146 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5147 return 0;
5148 }
5149
d51e9fe5
PB
5150 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
5151 return 0;
5152 }
6f14da52 5153
d51e9fe5
PB
5154 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5155 while (nb_sectors > 0) {
5156 int ret;
5157 int num = nb_sectors;
6f14da52 5158
d51e9fe5
PB
5159 /* align request */
5160 if (bs->bl.discard_alignment &&
5161 num >= bs->bl.discard_alignment &&
5162 sector_num % bs->bl.discard_alignment) {
5163 if (num > bs->bl.discard_alignment) {
5164 num = bs->bl.discard_alignment;
6f14da52 5165 }
d51e9fe5
PB
5166 num -= sector_num % bs->bl.discard_alignment;
5167 }
6f14da52 5168
d51e9fe5
PB
5169 /* limit request size */
5170 if (num > max_discard) {
5171 num = max_discard;
5172 }
6f14da52 5173
d51e9fe5 5174 if (bs->drv->bdrv_co_discard) {
6f14da52 5175 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
d51e9fe5
PB
5176 } else {
5177 BlockDriverAIOCB *acb;
5178 CoroutineIOCompletion co = {
5179 .coroutine = qemu_coroutine_self(),
5180 };
5181
5182 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5183 bdrv_co_io_em_complete, &co);
5184 if (acb == NULL) {
5185 return -EIO;
5186 } else {
5187 qemu_coroutine_yield();
5188 ret = co.ret;
6f14da52 5189 }
6f14da52 5190 }
7ce21016 5191 if (ret && ret != -ENOTSUP) {
d51e9fe5 5192 return ret;
4265d620 5193 }
d51e9fe5
PB
5194
5195 sector_num += num;
5196 nb_sectors -= num;
4265d620 5197 }
d51e9fe5 5198 return 0;
4265d620
PB
5199}
5200
5201int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5202{
5203 Coroutine *co;
775aa8b6 5204 DiscardCo rwco = {
4265d620
PB
5205 .bs = bs,
5206 .sector_num = sector_num,
5207 .nb_sectors = nb_sectors,
5208 .ret = NOT_DONE,
5209 };
5210
5211 if (qemu_in_coroutine()) {
5212 /* Fast-path if already in coroutine context */
5213 bdrv_discard_co_entry(&rwco);
5214 } else {
2572b37a
SH
5215 AioContext *aio_context = bdrv_get_aio_context(bs);
5216
4265d620
PB
5217 co = qemu_coroutine_create(bdrv_discard_co_entry);
5218 qemu_coroutine_enter(co, &rwco);
5219 while (rwco.ret == NOT_DONE) {
2572b37a 5220 aio_poll(aio_context, true);
4265d620
PB
5221 }
5222 }
5223
5224 return rwco.ret;
5225}
5226
19cb3738
FB
5227/**************************************************************/
5228/* removable device support */
5229
5230/**
5231 * Return TRUE if the media is present
5232 */
5233int bdrv_is_inserted(BlockDriverState *bs)
5234{
5235 BlockDriver *drv = bs->drv;
a1aff5bf 5236
19cb3738
FB
5237 if (!drv)
5238 return 0;
5239 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
5240 return 1;
5241 return drv->bdrv_is_inserted(bs);
19cb3738
FB
5242}
5243
5244/**
8e49ca46
MA
5245 * Return whether the media changed since the last call to this
5246 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
5247 */
5248int bdrv_media_changed(BlockDriverState *bs)
5249{
5250 BlockDriver *drv = bs->drv;
19cb3738 5251
8e49ca46
MA
5252 if (drv && drv->bdrv_media_changed) {
5253 return drv->bdrv_media_changed(bs);
5254 }
5255 return -ENOTSUP;
19cb3738
FB
5256}
5257
5258/**
5259 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5260 */
f36f3949 5261void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
5262{
5263 BlockDriver *drv = bs->drv;
19cb3738 5264
822e1cd1
MA
5265 if (drv && drv->bdrv_eject) {
5266 drv->bdrv_eject(bs, eject_flag);
19cb3738 5267 }
6f382ed2
LC
5268
5269 if (bs->device_name[0] != '\0') {
a5ee7bd4
WX
5270 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
5271 eject_flag, &error_abort);
6f382ed2 5272 }
19cb3738
FB
5273}
5274
19cb3738
FB
5275/**
5276 * Lock or unlock the media (if it is locked, the user won't be able
5277 * to eject it manually).
5278 */
025e849a 5279void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
5280{
5281 BlockDriver *drv = bs->drv;
5282
025e849a 5283 trace_bdrv_lock_medium(bs, locked);
b8c6d095 5284
025e849a
MA
5285 if (drv && drv->bdrv_lock_medium) {
5286 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
5287 }
5288}
985a03b0
TS
5289
5290/* needed for generic scsi interface */
5291
5292int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5293{
5294 BlockDriver *drv = bs->drv;
5295
5296 if (drv && drv->bdrv_ioctl)
5297 return drv->bdrv_ioctl(bs, req, buf);
5298 return -ENOTSUP;
5299}
7d780669 5300
221f715d
AL
5301BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5302 unsigned long int req, void *buf,
5303 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 5304{
221f715d 5305 BlockDriver *drv = bs->drv;
7d780669 5306
221f715d
AL
5307 if (drv && drv->bdrv_aio_ioctl)
5308 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5309 return NULL;
7d780669 5310}
e268ca52 5311
1b7fd729 5312void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
7b6f9300 5313{
1b7fd729 5314 bs->guest_block_size = align;
7b6f9300 5315}
7cd1e32a 5316
e268ca52
AL
5317void *qemu_blockalign(BlockDriverState *bs, size_t size)
5318{
339064d5 5319 return qemu_memalign(bdrv_opt_mem_align(bs), size);
e268ca52 5320}
7cd1e32a 5321
7d2a35cc
KW
5322void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5323{
5324 size_t align = bdrv_opt_mem_align(bs);
5325
5326 /* Ensure that NULL is never returned on success */
5327 assert(align > 0);
5328 if (size == 0) {
5329 size = align;
5330 }
5331
5332 return qemu_try_memalign(align, size);
5333}
5334
c53b1c51
SH
5335/*
5336 * Check if all memory in this vector is sector aligned.
5337 */
5338bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5339{
5340 int i;
339064d5 5341 size_t alignment = bdrv_opt_mem_align(bs);
c53b1c51
SH
5342
5343 for (i = 0; i < qiov->niov; i++) {
339064d5 5344 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
c53b1c51 5345 return false;
1ff735bd 5346 }
339064d5 5347 if (qiov->iov[i].iov_len % alignment) {
1ff735bd 5348 return false;
c53b1c51
SH
5349 }
5350 }
5351
5352 return true;
5353}
5354
b8afb520
FZ
5355BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5356 Error **errp)
7cd1e32a
LS
5357{
5358 int64_t bitmap_size;
e4654d2d 5359 BdrvDirtyBitmap *bitmap;
a55eb92c 5360
50717e94
PB
5361 assert((granularity & (granularity - 1)) == 0);
5362
e4654d2d
FZ
5363 granularity >>= BDRV_SECTOR_BITS;
5364 assert(granularity);
57322b78 5365 bitmap_size = bdrv_nb_sectors(bs);
b8afb520
FZ
5366 if (bitmap_size < 0) {
5367 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5368 errno = -bitmap_size;
5369 return NULL;
5370 }
5839e53b 5371 bitmap = g_new0(BdrvDirtyBitmap, 1);
e4654d2d
FZ
5372 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5373 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5374 return bitmap;
5375}
5376
5377void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5378{
5379 BdrvDirtyBitmap *bm, *next;
5380 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5381 if (bm == bitmap) {
5382 QLIST_REMOVE(bitmap, list);
5383 hbitmap_free(bitmap->bitmap);
5384 g_free(bitmap);
5385 return;
a55eb92c 5386 }
7cd1e32a
LS
5387 }
5388}
5389
21b56835
FZ
5390BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5391{
5392 BdrvDirtyBitmap *bm;
5393 BlockDirtyInfoList *list = NULL;
5394 BlockDirtyInfoList **plist = &list;
5395
5396 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5839e53b
MA
5397 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5398 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
21b56835
FZ
5399 info->count = bdrv_get_dirty_count(bs, bm);
5400 info->granularity =
5401 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5402 entry->value = info;
5403 *plist = entry;
5404 plist = &entry->next;
5405 }
5406
5407 return list;
5408}
5409
e4654d2d 5410int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
7cd1e32a 5411{
e4654d2d
FZ
5412 if (bitmap) {
5413 return hbitmap_get(bitmap->bitmap, sector);
7cd1e32a
LS
5414 } else {
5415 return 0;
5416 }
5417}
5418
e4654d2d
FZ
5419void bdrv_dirty_iter_init(BlockDriverState *bs,
5420 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
1755da16 5421{
e4654d2d 5422 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
1755da16
PB
5423}
5424
5425void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5426 int nr_sectors)
5427{
e4654d2d
FZ
5428 BdrvDirtyBitmap *bitmap;
5429 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5430 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5431 }
1755da16
PB
5432}
5433
e4654d2d 5434void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
7cd1e32a 5435{
e4654d2d
FZ
5436 BdrvDirtyBitmap *bitmap;
5437 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5438 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5439 }
7cd1e32a 5440}
aaa0eb75 5441
e4654d2d 5442int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
aaa0eb75 5443{
e4654d2d 5444 return hbitmap_count(bitmap->bitmap);
aaa0eb75 5445}
f88e1a42 5446
9fcb0251
FZ
5447/* Get a reference to bs */
5448void bdrv_ref(BlockDriverState *bs)
5449{
5450 bs->refcnt++;
5451}
5452
5453/* Release a previously grabbed reference to bs.
5454 * If after releasing, reference count is zero, the BlockDriverState is
5455 * deleted. */
5456void bdrv_unref(BlockDriverState *bs)
5457{
9a4d5ca6
JC
5458 if (!bs) {
5459 return;
5460 }
9fcb0251
FZ
5461 assert(bs->refcnt > 0);
5462 if (--bs->refcnt == 0) {
5463 bdrv_delete(bs);
5464 }
5465}
5466
fbe40ff7
FZ
5467struct BdrvOpBlocker {
5468 Error *reason;
5469 QLIST_ENTRY(BdrvOpBlocker) list;
5470};
5471
5472bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5473{
5474 BdrvOpBlocker *blocker;
5475 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5476 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5477 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5478 if (errp) {
5479 error_setg(errp, "Device '%s' is busy: %s",
5480 bs->device_name, error_get_pretty(blocker->reason));
5481 }
5482 return true;
5483 }
5484 return false;
5485}
5486
5487void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5488{
5489 BdrvOpBlocker *blocker;
5490 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5491
5839e53b 5492 blocker = g_new0(BdrvOpBlocker, 1);
fbe40ff7
FZ
5493 blocker->reason = reason;
5494 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5495}
5496
5497void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5498{
5499 BdrvOpBlocker *blocker, *next;
5500 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5501 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5502 if (blocker->reason == reason) {
5503 QLIST_REMOVE(blocker, list);
5504 g_free(blocker);
5505 }
5506 }
5507}
5508
5509void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5510{
5511 int i;
5512 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5513 bdrv_op_block(bs, i, reason);
5514 }
5515}
5516
5517void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5518{
5519 int i;
5520 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5521 bdrv_op_unblock(bs, i, reason);
5522 }
5523}
5524
5525bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5526{
5527 int i;
5528
5529 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5530 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5531 return false;
5532 }
5533 }
5534 return true;
5535}
5536
28a7282a
LC
5537void bdrv_iostatus_enable(BlockDriverState *bs)
5538{
d6bf279e 5539 bs->iostatus_enabled = true;
58e21ef5 5540 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
5541}
5542
5543/* The I/O status is only enabled if the drive explicitly
5544 * enables it _and_ the VM is configured to stop on errors */
5545bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5546{
d6bf279e 5547 return (bs->iostatus_enabled &&
92aa5c6d
PB
5548 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5549 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5550 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
28a7282a
LC
5551}
5552
5553void bdrv_iostatus_disable(BlockDriverState *bs)
5554{
d6bf279e 5555 bs->iostatus_enabled = false;
28a7282a
LC
5556}
5557
5558void bdrv_iostatus_reset(BlockDriverState *bs)
5559{
5560 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 5561 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3bd293c3
PB
5562 if (bs->job) {
5563 block_job_iostatus_reset(bs->job);
5564 }
28a7282a
LC
5565 }
5566}
5567
28a7282a
LC
5568void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5569{
3e1caa5f
PB
5570 assert(bdrv_iostatus_is_enabled(bs));
5571 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
58e21ef5
LC
5572 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5573 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
5574 }
5575}
5576
d92ada22
LC
5577void bdrv_img_create(const char *filename, const char *fmt,
5578 const char *base_filename, const char *base_fmt,
f382d43a
MR
5579 char *options, uint64_t img_size, int flags,
5580 Error **errp, bool quiet)
f88e1a42 5581{
83d0521a
CL
5582 QemuOptsList *create_opts = NULL;
5583 QemuOpts *opts = NULL;
5584 const char *backing_fmt, *backing_file;
5585 int64_t size;
f88e1a42 5586 BlockDriver *drv, *proto_drv;
96df67d1 5587 BlockDriver *backing_drv = NULL;
cc84d90f 5588 Error *local_err = NULL;
f88e1a42
JS
5589 int ret = 0;
5590
5591 /* Find driver and parse its options */
5592 drv = bdrv_find_format(fmt);
5593 if (!drv) {
71c79813 5594 error_setg(errp, "Unknown file format '%s'", fmt);
d92ada22 5595 return;
f88e1a42
JS
5596 }
5597
98289620 5598 proto_drv = bdrv_find_protocol(filename, true);
f88e1a42 5599 if (!proto_drv) {
71c79813 5600 error_setg(errp, "Unknown protocol '%s'", filename);
d92ada22 5601 return;
f88e1a42
JS
5602 }
5603
c282e1fd
CL
5604 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5605 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
f88e1a42
JS
5606
5607 /* Create parameter list with default values */
83d0521a
CL
5608 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5609 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
f88e1a42
JS
5610
5611 /* Parse -o options */
5612 if (options) {
83d0521a
CL
5613 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5614 error_setg(errp, "Invalid options for file format '%s'", fmt);
f88e1a42
JS
5615 goto out;
5616 }
5617 }
5618
5619 if (base_filename) {
83d0521a 5620 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
71c79813
LC
5621 error_setg(errp, "Backing file not supported for file format '%s'",
5622 fmt);
f88e1a42
JS
5623 goto out;
5624 }
5625 }
5626
5627 if (base_fmt) {
83d0521a 5628 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
71c79813
LC
5629 error_setg(errp, "Backing file format not supported for file "
5630 "format '%s'", fmt);
f88e1a42
JS
5631 goto out;
5632 }
5633 }
5634
83d0521a
CL
5635 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5636 if (backing_file) {
5637 if (!strcmp(filename, backing_file)) {
71c79813
LC
5638 error_setg(errp, "Error: Trying to create an image with the "
5639 "same filename as the backing file");
792da93a
JS
5640 goto out;
5641 }
5642 }
5643
83d0521a
CL
5644 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5645 if (backing_fmt) {
5646 backing_drv = bdrv_find_format(backing_fmt);
96df67d1 5647 if (!backing_drv) {
71c79813 5648 error_setg(errp, "Unknown backing file format '%s'",
83d0521a 5649 backing_fmt);
f88e1a42
JS
5650 goto out;
5651 }
5652 }
5653
5654 // The size for the image must always be specified, with one exception:
5655 // If we are using a backing file, we can obtain the size from there
83d0521a
CL
5656 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5657 if (size == -1) {
5658 if (backing_file) {
66f6b814 5659 BlockDriverState *bs;
52bf1e72 5660 int64_t size;
63090dac
PB
5661 int back_flags;
5662
5663 /* backing files always opened read-only */
5664 back_flags =
5665 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 5666
f67503e5 5667 bs = NULL;
83d0521a 5668 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
cc84d90f 5669 backing_drv, &local_err);
f88e1a42 5670 if (ret < 0) {
cc84d90f 5671 error_setg_errno(errp, -ret, "Could not open '%s': %s",
83d0521a 5672 backing_file,
cc84d90f
HR
5673 error_get_pretty(local_err));
5674 error_free(local_err);
5675 local_err = NULL;
f88e1a42
JS
5676 goto out;
5677 }
52bf1e72
MA
5678 size = bdrv_getlength(bs);
5679 if (size < 0) {
5680 error_setg_errno(errp, -size, "Could not get size of '%s'",
5681 backing_file);
5682 bdrv_unref(bs);
5683 goto out;
5684 }
f88e1a42 5685
83d0521a 5686 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
66f6b814
HR
5687
5688 bdrv_unref(bs);
f88e1a42 5689 } else {
71c79813 5690 error_setg(errp, "Image creation needs a size parameter");
f88e1a42
JS
5691 goto out;
5692 }
5693 }
5694
f382d43a
MR
5695 if (!quiet) {
5696 printf("Formatting '%s', fmt=%s ", filename, fmt);
83d0521a 5697 qemu_opts_print(opts);
f382d43a
MR
5698 puts("");
5699 }
83d0521a 5700
c282e1fd 5701 ret = bdrv_create(drv, filename, opts, &local_err);
83d0521a 5702
cc84d90f
HR
5703 if (ret == -EFBIG) {
5704 /* This is generally a better message than whatever the driver would
5705 * deliver (especially because of the cluster_size_hint), since that
5706 * is most probably not much different from "image too large". */
5707 const char *cluster_size_hint = "";
83d0521a 5708 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
cc84d90f 5709 cluster_size_hint = " (try using a larger cluster size)";
f88e1a42 5710 }
cc84d90f
HR
5711 error_setg(errp, "The image size is too large for file format '%s'"
5712 "%s", fmt, cluster_size_hint);
5713 error_free(local_err);
5714 local_err = NULL;
f88e1a42
JS
5715 }
5716
5717out:
83d0521a
CL
5718 qemu_opts_del(opts);
5719 qemu_opts_free(create_opts);
84d18f06 5720 if (local_err) {
cc84d90f
HR
5721 error_propagate(errp, local_err);
5722 }
f88e1a42 5723}
85d126f3
SH
5724
5725AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5726{
dcd04228
SH
5727 return bs->aio_context;
5728}
5729
5730void bdrv_detach_aio_context(BlockDriverState *bs)
5731{
33384421
HR
5732 BdrvAioNotifier *baf;
5733
dcd04228
SH
5734 if (!bs->drv) {
5735 return;
5736 }
5737
33384421
HR
5738 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5739 baf->detach_aio_context(baf->opaque);
5740 }
5741
13af91eb
SH
5742 if (bs->io_limits_enabled) {
5743 throttle_detach_aio_context(&bs->throttle_state);
5744 }
dcd04228
SH
5745 if (bs->drv->bdrv_detach_aio_context) {
5746 bs->drv->bdrv_detach_aio_context(bs);
5747 }
5748 if (bs->file) {
5749 bdrv_detach_aio_context(bs->file);
5750 }
5751 if (bs->backing_hd) {
5752 bdrv_detach_aio_context(bs->backing_hd);
5753 }
5754
5755 bs->aio_context = NULL;
5756}
5757
5758void bdrv_attach_aio_context(BlockDriverState *bs,
5759 AioContext *new_context)
5760{
33384421
HR
5761 BdrvAioNotifier *ban;
5762
dcd04228
SH
5763 if (!bs->drv) {
5764 return;
5765 }
5766
5767 bs->aio_context = new_context;
5768
5769 if (bs->backing_hd) {
5770 bdrv_attach_aio_context(bs->backing_hd, new_context);
5771 }
5772 if (bs->file) {
5773 bdrv_attach_aio_context(bs->file, new_context);
5774 }
5775 if (bs->drv->bdrv_attach_aio_context) {
5776 bs->drv->bdrv_attach_aio_context(bs, new_context);
5777 }
13af91eb
SH
5778 if (bs->io_limits_enabled) {
5779 throttle_attach_aio_context(&bs->throttle_state, new_context);
5780 }
33384421
HR
5781
5782 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5783 ban->attached_aio_context(new_context, ban->opaque);
5784 }
dcd04228
SH
5785}
5786
5787void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5788{
5789 bdrv_drain_all(); /* ensure there are no in-flight requests */
5790
5791 bdrv_detach_aio_context(bs);
5792
5793 /* This function executes in the old AioContext so acquire the new one in
5794 * case it runs in a different thread.
5795 */
5796 aio_context_acquire(new_context);
5797 bdrv_attach_aio_context(bs, new_context);
5798 aio_context_release(new_context);
85d126f3 5799}
d616b224 5800
33384421
HR
5801void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5802 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5803 void (*detach_aio_context)(void *opaque), void *opaque)
5804{
5805 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5806 *ban = (BdrvAioNotifier){
5807 .attached_aio_context = attached_aio_context,
5808 .detach_aio_context = detach_aio_context,
5809 .opaque = opaque
5810 };
5811
5812 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5813}
5814
5815void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5816 void (*attached_aio_context)(AioContext *,
5817 void *),
5818 void (*detach_aio_context)(void *),
5819 void *opaque)
5820{
5821 BdrvAioNotifier *ban, *ban_next;
5822
5823 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5824 if (ban->attached_aio_context == attached_aio_context &&
5825 ban->detach_aio_context == detach_aio_context &&
5826 ban->opaque == opaque)
5827 {
5828 QLIST_REMOVE(ban, list);
5829 g_free(ban);
5830
5831 return;
5832 }
5833 }
5834
5835 abort();
5836}
5837
d616b224
SH
5838void bdrv_add_before_write_notifier(BlockDriverState *bs,
5839 NotifierWithReturn *notifier)
5840{
5841 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5842}
6f176b48 5843
c282e1fd 5844int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
6f176b48 5845{
c282e1fd 5846 if (!bs->drv->bdrv_amend_options) {
6f176b48
HR
5847 return -ENOTSUP;
5848 }
c282e1fd 5849 return bs->drv->bdrv_amend_options(bs, opts);
6f176b48 5850}
f6186f49 5851
b5042a36
BC
5852/* This function will be called by the bdrv_recurse_is_first_non_filter method
5853 * of block filter and by bdrv_is_first_non_filter.
5854 * It is used to test if the given bs is the candidate or recurse more in the
5855 * node graph.
212a5a8f 5856 */
b5042a36 5857bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
212a5a8f 5858 BlockDriverState *candidate)
f6186f49 5859{
b5042a36
BC
5860 /* return false if basic checks fails */
5861 if (!bs || !bs->drv) {
212a5a8f 5862 return false;
f6186f49
BC
5863 }
5864
b5042a36
BC
5865 /* the code reached a non block filter driver -> check if the bs is
5866 * the same as the candidate. It's the recursion termination condition.
5867 */
5868 if (!bs->drv->is_filter) {
5869 return bs == candidate;
212a5a8f 5870 }
b5042a36 5871 /* Down this path the driver is a block filter driver */
212a5a8f 5872
b5042a36
BC
5873 /* If the block filter recursion method is defined use it to recurse down
5874 * the node graph.
5875 */
5876 if (bs->drv->bdrv_recurse_is_first_non_filter) {
212a5a8f 5877 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
f6186f49
BC
5878 }
5879
b5042a36
BC
5880 /* the driver is a block filter but don't allow to recurse -> return false
5881 */
5882 return false;
f6186f49
BC
5883}
5884
212a5a8f
BC
5885/* This function checks if the candidate is the first non filter bs down it's
5886 * bs chain. Since we don't have pointers to parents it explore all bs chains
5887 * from the top. Some filters can choose not to pass down the recursion.
5888 */
5889bool bdrv_is_first_non_filter(BlockDriverState *candidate)
f6186f49 5890{
212a5a8f
BC
5891 BlockDriverState *bs;
5892
5893 /* walk down the bs forest recursively */
5894 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5895 bool perm;
5896
b5042a36 5897 /* try to recurse in this top level bs */
e6dc8a1f 5898 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
212a5a8f
BC
5899
5900 /* candidate is the first non filter */
5901 if (perm) {
5902 return true;
5903 }
5904 }
5905
5906 return false;
f6186f49 5907}
09158f00
BC
5908
5909BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
5910{
5911 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5912 if (!to_replace_bs) {
5913 error_setg(errp, "Node name '%s' not found", node_name);
5914 return NULL;
5915 }
5916
5917 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5918 return NULL;
5919 }
5920
5921 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5922 * most non filter in order to prevent data corruption.
5923 * Another benefit is that this tests exclude backing files which are
5924 * blocked by the backing blockers.
5925 */
5926 if (!bdrv_is_first_non_filter(to_replace_bs)) {
5927 error_setg(errp, "Only top most non filter can be replaced");
5928 return NULL;
5929 }
5930
5931 return to_replace_bs;
5932}
448ad91d
ML
5933
5934void bdrv_io_plug(BlockDriverState *bs)
5935{
5936 BlockDriver *drv = bs->drv;
5937 if (drv && drv->bdrv_io_plug) {
5938 drv->bdrv_io_plug(bs);
5939 } else if (bs->file) {
5940 bdrv_io_plug(bs->file);
5941 }
5942}
5943
5944void bdrv_io_unplug(BlockDriverState *bs)
5945{
5946 BlockDriver *drv = bs->drv;
5947 if (drv && drv->bdrv_io_unplug) {
5948 drv->bdrv_io_unplug(bs);
5949 } else if (bs->file) {
5950 bdrv_io_unplug(bs->file);
5951 }
5952}
5953
5954void bdrv_flush_io_queue(BlockDriverState *bs)
5955{
5956 BlockDriver *drv = bs->drv;
5957 if (drv && drv->bdrv_flush_io_queue) {
5958 drv->bdrv_flush_io_queue(bs);
5959 } else if (bs->file) {
5960 bdrv_flush_io_queue(bs->file);
5961 }
5962}
91af7014
HR
5963
5964static bool append_open_options(QDict *d, BlockDriverState *bs)
5965{
5966 const QDictEntry *entry;
5967 bool found_any = false;
5968
5969 for (entry = qdict_first(bs->options); entry;
5970 entry = qdict_next(bs->options, entry))
5971 {
5972 /* Only take options for this level and exclude all non-driver-specific
5973 * options */
5974 if (!strchr(qdict_entry_key(entry), '.') &&
5975 strcmp(qdict_entry_key(entry), "node-name"))
5976 {
5977 qobject_incref(qdict_entry_value(entry));
5978 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
5979 found_any = true;
5980 }
5981 }
5982
5983 return found_any;
5984}
5985
5986/* Updates the following BDS fields:
5987 * - exact_filename: A filename which may be used for opening a block device
5988 * which (mostly) equals the given BDS (even without any
5989 * other options; so reading and writing must return the same
5990 * results, but caching etc. may be different)
5991 * - full_open_options: Options which, when given when opening a block device
5992 * (without a filename), result in a BDS (mostly)
5993 * equalling the given one
5994 * - filename: If exact_filename is set, it is copied here. Otherwise,
5995 * full_open_options is converted to a JSON object, prefixed with
5996 * "json:" (for use through the JSON pseudo protocol) and put here.
5997 */
5998void bdrv_refresh_filename(BlockDriverState *bs)
5999{
6000 BlockDriver *drv = bs->drv;
6001 QDict *opts;
6002
6003 if (!drv) {
6004 return;
6005 }
6006
6007 /* This BDS's file name will most probably depend on its file's name, so
6008 * refresh that first */
6009 if (bs->file) {
6010 bdrv_refresh_filename(bs->file);
6011 }
6012
6013 if (drv->bdrv_refresh_filename) {
6014 /* Obsolete information is of no use here, so drop the old file name
6015 * information before refreshing it */
6016 bs->exact_filename[0] = '\0';
6017 if (bs->full_open_options) {
6018 QDECREF(bs->full_open_options);
6019 bs->full_open_options = NULL;
6020 }
6021
6022 drv->bdrv_refresh_filename(bs);
6023 } else if (bs->file) {
6024 /* Try to reconstruct valid information from the underlying file */
6025 bool has_open_options;
6026
6027 bs->exact_filename[0] = '\0';
6028 if (bs->full_open_options) {
6029 QDECREF(bs->full_open_options);
6030 bs->full_open_options = NULL;
6031 }
6032
6033 opts = qdict_new();
6034 has_open_options = append_open_options(opts, bs);
6035
6036 /* If no specific options have been given for this BDS, the filename of
6037 * the underlying file should suffice for this one as well */
6038 if (bs->file->exact_filename[0] && !has_open_options) {
6039 strcpy(bs->exact_filename, bs->file->exact_filename);
6040 }
6041 /* Reconstructing the full options QDict is simple for most format block
6042 * drivers, as long as the full options are known for the underlying
6043 * file BDS. The full options QDict of that file BDS should somehow
6044 * contain a representation of the filename, therefore the following
6045 * suffices without querying the (exact_)filename of this BDS. */
6046 if (bs->file->full_open_options) {
6047 qdict_put_obj(opts, "driver",
6048 QOBJECT(qstring_from_str(drv->format_name)));
6049 QINCREF(bs->file->full_open_options);
6050 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6051
6052 bs->full_open_options = opts;
6053 } else {
6054 QDECREF(opts);
6055 }
6056 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6057 /* There is no underlying file BDS (at least referenced by BDS.file),
6058 * so the full options QDict should be equal to the options given
6059 * specifically for this block device when it was opened (plus the
6060 * driver specification).
6061 * Because those options don't change, there is no need to update
6062 * full_open_options when it's already set. */
6063
6064 opts = qdict_new();
6065 append_open_options(opts, bs);
6066 qdict_put_obj(opts, "driver",
6067 QOBJECT(qstring_from_str(drv->format_name)));
6068
6069 if (bs->exact_filename[0]) {
6070 /* This may not work for all block protocol drivers (some may
6071 * require this filename to be parsed), but we have to find some
6072 * default solution here, so just include it. If some block driver
6073 * does not support pure options without any filename at all or
6074 * needs some special format of the options QDict, it needs to
6075 * implement the driver-specific bdrv_refresh_filename() function.
6076 */
6077 qdict_put_obj(opts, "filename",
6078 QOBJECT(qstring_from_str(bs->exact_filename)));
6079 }
6080
6081 bs->full_open_options = opts;
6082 }
6083
6084 if (bs->exact_filename[0]) {
6085 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6086 } else if (bs->full_open_options) {
6087 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6088 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6089 qstring_get_str(json));
6090 QDECREF(json);
6091 }
6092}
5366d0c8
BC
6093
6094/* This accessor function purpose is to allow the device models to access the
6095 * BlockAcctStats structure embedded inside a BlockDriverState without being
6096 * aware of the BlockDriverState structure layout.
6097 * It will go away when the BlockAcctStats structure will be moved inside
6098 * the device models.
6099 */
6100BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6101{
6102 return &bs->stats;
6103}
This page took 1.744161 seconds and 4 git commands to generate.