]> Git Repo - qemu.git/blame - block.c
vpc: fix beX_to_cpu() and cpu_to_beX() confusion
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
737e150e
PB
27#include "block/block_int.h"
28#include "block/blockjob.h"
1de7afc9 29#include "qemu/module.h"
7b1b5d19 30#include "qapi/qmp/qjson.h"
9c17d615 31#include "sysemu/sysemu.h"
3ae59580 32#include "sysemu/blockdev.h" /* FIXME layering violation */
1de7afc9 33#include "qemu/notify.h"
737e150e 34#include "block/coroutine.h"
c13163fb 35#include "block/qapi.h"
b2023818 36#include "qmp-commands.h"
1de7afc9 37#include "qemu/timer.h"
a5ee7bd4 38#include "qapi-event.h"
fc01f7e7 39
71e72a19 40#ifdef CONFIG_BSD
7674e7bf
FB
41#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
72cf2d4f 44#include <sys/queue.h>
c5e97233 45#ifndef __DragonFly__
7674e7bf
FB
46#include <sys/disk.h>
47#endif
c5e97233 48#endif
7674e7bf 49
49dc768d
AL
50#ifdef _WIN32
51#include <windows.h>
52#endif
53
e4654d2d
FZ
54struct BdrvDirtyBitmap {
55 HBitmap *bitmap;
56 QLIST_ENTRY(BdrvDirtyBitmap) list;
57};
58
1c9805a3
SH
59#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60
2a87151f
SH
61#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
62
7d4b4ba5 63static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
64static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 66 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
67static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
68 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 69 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
70static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
775aa8b6
KW
76static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
470c0504 78 BdrvRequestFlags flags);
775aa8b6
KW
79static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
80 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
f08f2dda 81 BdrvRequestFlags flags);
b2a61371
SH
82static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83 int64_t sector_num,
84 QEMUIOVector *qiov,
85 int nb_sectors,
d20d9b7c 86 BdrvRequestFlags flags,
b2a61371
SH
87 BlockDriverCompletionFunc *cb,
88 void *opaque,
8c5873d6 89 bool is_write);
b2a61371 90static void coroutine_fn bdrv_co_do_rw(void *opaque);
621f0589 91static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
aa7bfbff 92 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
ec530c81 93
1b7bdbc1
SH
94static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 96
dc364f4c
BC
97static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
98 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
99
8a22f02a
SH
100static QLIST_HEAD(, BlockDriver) bdrv_drivers =
101 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 102
eb852011
MA
103/* If non-zero, use only whitelisted block drivers */
104static int use_bdrv_whitelist;
105
9e0b22f4
SH
106#ifdef _WIN32
107static int is_windows_drive_prefix(const char *filename)
108{
109 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
110 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
111 filename[1] == ':');
112}
113
114int is_windows_drive(const char *filename)
115{
116 if (is_windows_drive_prefix(filename) &&
117 filename[2] == '\0')
118 return 1;
119 if (strstart(filename, "\\\\.\\", NULL) ||
120 strstart(filename, "//./", NULL))
121 return 1;
122 return 0;
123}
124#endif
125
0563e191 126/* throttling disk I/O limits */
cc0681c4
BC
127void bdrv_set_io_limits(BlockDriverState *bs,
128 ThrottleConfig *cfg)
98f90dba 129{
cc0681c4 130 int i;
98f90dba 131
cc0681c4 132 throttle_config(&bs->throttle_state, cfg);
98f90dba 133
cc0681c4
BC
134 for (i = 0; i < 2; i++) {
135 qemu_co_enter_next(&bs->throttled_reqs[i]);
98f90dba 136 }
cc0681c4
BC
137}
138
139/* this function drain all the throttled IOs */
140static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
141{
142 bool drained = false;
143 bool enabled = bs->io_limits_enabled;
144 int i;
145
146 bs->io_limits_enabled = false;
147
148 for (i = 0; i < 2; i++) {
149 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
150 drained = true;
151 }
152 }
153
154 bs->io_limits_enabled = enabled;
98f90dba 155
cc0681c4 156 return drained;
98f90dba
ZYW
157}
158
cc0681c4 159void bdrv_io_limits_disable(BlockDriverState *bs)
0563e191 160{
cc0681c4 161 bs->io_limits_enabled = false;
0563e191 162
cc0681c4
BC
163 bdrv_start_throttled_reqs(bs);
164
165 throttle_destroy(&bs->throttle_state);
0563e191
ZYW
166}
167
cc0681c4 168static void bdrv_throttle_read_timer_cb(void *opaque)
0563e191 169{
cc0681c4
BC
170 BlockDriverState *bs = opaque;
171 qemu_co_enter_next(&bs->throttled_reqs[0]);
0563e191
ZYW
172}
173
cc0681c4 174static void bdrv_throttle_write_timer_cb(void *opaque)
0563e191 175{
cc0681c4
BC
176 BlockDriverState *bs = opaque;
177 qemu_co_enter_next(&bs->throttled_reqs[1]);
0563e191
ZYW
178}
179
cc0681c4
BC
180/* should be called before bdrv_set_io_limits if a limit is set */
181void bdrv_io_limits_enable(BlockDriverState *bs)
182{
183 assert(!bs->io_limits_enabled);
184 throttle_init(&bs->throttle_state,
13af91eb 185 bdrv_get_aio_context(bs),
cc0681c4
BC
186 QEMU_CLOCK_VIRTUAL,
187 bdrv_throttle_read_timer_cb,
188 bdrv_throttle_write_timer_cb,
189 bs);
190 bs->io_limits_enabled = true;
191}
192
193/* This function makes an IO wait if needed
194 *
195 * @nb_sectors: the number of sectors of the IO
196 * @is_write: is the IO a write
197 */
98f90dba 198static void bdrv_io_limits_intercept(BlockDriverState *bs,
d5103588 199 unsigned int bytes,
cc0681c4 200 bool is_write)
98f90dba 201{
cc0681c4
BC
202 /* does this io must wait */
203 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
98f90dba 204
cc0681c4
BC
205 /* if must wait or any request of this type throttled queue the IO */
206 if (must_wait ||
207 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
208 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
98f90dba
ZYW
209 }
210
cc0681c4 211 /* the IO will be executed, do the accounting */
d5103588
KW
212 throttle_account(&bs->throttle_state, is_write, bytes);
213
98f90dba 214
cc0681c4
BC
215 /* if the next request must wait -> do nothing */
216 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
217 return;
98f90dba
ZYW
218 }
219
cc0681c4
BC
220 /* else queue next request for execution */
221 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
98f90dba
ZYW
222}
223
339064d5
KW
224size_t bdrv_opt_mem_align(BlockDriverState *bs)
225{
226 if (!bs || !bs->drv) {
227 /* 4k should be on the safe side */
228 return 4096;
229 }
230
231 return bs->bl.opt_mem_alignment;
232}
233
9e0b22f4
SH
234/* check if the path starts with "<protocol>:" */
235static int path_has_protocol(const char *path)
236{
947995c0
PB
237 const char *p;
238
9e0b22f4
SH
239#ifdef _WIN32
240 if (is_windows_drive(path) ||
241 is_windows_drive_prefix(path)) {
242 return 0;
243 }
947995c0
PB
244 p = path + strcspn(path, ":/\\");
245#else
246 p = path + strcspn(path, ":/");
9e0b22f4
SH
247#endif
248
947995c0 249 return *p == ':';
9e0b22f4
SH
250}
251
83f64091 252int path_is_absolute(const char *path)
3b0d4f61 253{
21664424
FB
254#ifdef _WIN32
255 /* specific case for names like: "\\.\d:" */
f53f4da9 256 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 257 return 1;
f53f4da9
PB
258 }
259 return (*path == '/' || *path == '\\');
3b9f94e1 260#else
f53f4da9 261 return (*path == '/');
3b9f94e1 262#endif
3b0d4f61
FB
263}
264
83f64091
FB
265/* if filename is absolute, just copy it to dest. Otherwise, build a
266 path to it by considering it is relative to base_path. URL are
267 supported. */
268void path_combine(char *dest, int dest_size,
269 const char *base_path,
270 const char *filename)
3b0d4f61 271{
83f64091
FB
272 const char *p, *p1;
273 int len;
274
275 if (dest_size <= 0)
276 return;
277 if (path_is_absolute(filename)) {
278 pstrcpy(dest, dest_size, filename);
279 } else {
280 p = strchr(base_path, ':');
281 if (p)
282 p++;
283 else
284 p = base_path;
3b9f94e1
FB
285 p1 = strrchr(base_path, '/');
286#ifdef _WIN32
287 {
288 const char *p2;
289 p2 = strrchr(base_path, '\\');
290 if (!p1 || p2 > p1)
291 p1 = p2;
292 }
293#endif
83f64091
FB
294 if (p1)
295 p1++;
296 else
297 p1 = base_path;
298 if (p1 > p)
299 p = p1;
300 len = p - base_path;
301 if (len > dest_size - 1)
302 len = dest_size - 1;
303 memcpy(dest, base_path, len);
304 dest[len] = '\0';
305 pstrcat(dest, dest_size, filename);
3b0d4f61 306 }
3b0d4f61
FB
307}
308
dc5a1371
PB
309void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
310{
311 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
312 pstrcpy(dest, sz, bs->backing_file);
313 } else {
314 path_combine(dest, sz, bs->filename, bs->backing_file);
315 }
316}
317
5efa9d5a 318void bdrv_register(BlockDriver *bdrv)
ea2384d3 319{
8c5873d6
SH
320 /* Block drivers without coroutine functions need emulation */
321 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
322 bdrv->bdrv_co_readv = bdrv_co_readv_em;
323 bdrv->bdrv_co_writev = bdrv_co_writev_em;
324
f8c35c1d
SH
325 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
326 * the block driver lacks aio we need to emulate that too.
327 */
f9f05dc5
KW
328 if (!bdrv->bdrv_aio_readv) {
329 /* add AIO emulation layer */
330 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
331 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 332 }
83f64091 333 }
b2e12bc6 334
8a22f02a 335 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 336}
b338082b
FB
337
338/* create a new block device (by default it is empty) */
98522f63 339BlockDriverState *bdrv_new(const char *device_name, Error **errp)
b338082b 340{
1b7bdbc1 341 BlockDriverState *bs;
fbe40ff7 342 int i;
b338082b 343
f2d953ec
KW
344 if (bdrv_find(device_name)) {
345 error_setg(errp, "Device with id '%s' already exists",
346 device_name);
347 return NULL;
348 }
349 if (bdrv_find_node(device_name)) {
d224469d
MA
350 error_setg(errp,
351 "Device name '%s' conflicts with an existing node name",
f2d953ec
KW
352 device_name);
353 return NULL;
354 }
355
5839e53b 356 bs = g_new0(BlockDriverState, 1);
e4654d2d 357 QLIST_INIT(&bs->dirty_bitmaps);
b338082b 358 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 359 if (device_name[0] != '\0') {
dc364f4c 360 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
ea2384d3 361 }
fbe40ff7
FZ
362 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
363 QLIST_INIT(&bs->op_blockers[i]);
364 }
28a7282a 365 bdrv_iostatus_disable(bs);
d7d512f6 366 notifier_list_init(&bs->close_notifiers);
d616b224 367 notifier_with_return_list_init(&bs->before_write_notifiers);
cc0681c4
BC
368 qemu_co_queue_init(&bs->throttled_reqs[0]);
369 qemu_co_queue_init(&bs->throttled_reqs[1]);
9fcb0251 370 bs->refcnt = 1;
dcd04228 371 bs->aio_context = qemu_get_aio_context();
d7d512f6 372
b338082b
FB
373 return bs;
374}
375
d7d512f6
PB
376void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
377{
378 notifier_list_add(&bs->close_notifiers, notify);
379}
380
ea2384d3
FB
381BlockDriver *bdrv_find_format(const char *format_name)
382{
383 BlockDriver *drv1;
8a22f02a
SH
384 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
385 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 386 return drv1;
8a22f02a 387 }
ea2384d3
FB
388 }
389 return NULL;
390}
391
b64ec4e4 392static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
eb852011 393{
b64ec4e4
FZ
394 static const char *whitelist_rw[] = {
395 CONFIG_BDRV_RW_WHITELIST
396 };
397 static const char *whitelist_ro[] = {
398 CONFIG_BDRV_RO_WHITELIST
eb852011
MA
399 };
400 const char **p;
401
b64ec4e4 402 if (!whitelist_rw[0] && !whitelist_ro[0]) {
eb852011 403 return 1; /* no whitelist, anything goes */
b64ec4e4 404 }
eb852011 405
b64ec4e4 406 for (p = whitelist_rw; *p; p++) {
eb852011
MA
407 if (!strcmp(drv->format_name, *p)) {
408 return 1;
409 }
410 }
b64ec4e4
FZ
411 if (read_only) {
412 for (p = whitelist_ro; *p; p++) {
413 if (!strcmp(drv->format_name, *p)) {
414 return 1;
415 }
416 }
417 }
eb852011
MA
418 return 0;
419}
420
b64ec4e4
FZ
421BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
422 bool read_only)
eb852011
MA
423{
424 BlockDriver *drv = bdrv_find_format(format_name);
b64ec4e4 425 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
eb852011
MA
426}
427
5b7e1542
ZYW
428typedef struct CreateCo {
429 BlockDriver *drv;
430 char *filename;
83d0521a 431 QemuOpts *opts;
5b7e1542 432 int ret;
cc84d90f 433 Error *err;
5b7e1542
ZYW
434} CreateCo;
435
436static void coroutine_fn bdrv_create_co_entry(void *opaque)
437{
cc84d90f
HR
438 Error *local_err = NULL;
439 int ret;
440
5b7e1542
ZYW
441 CreateCo *cco = opaque;
442 assert(cco->drv);
443
c282e1fd 444 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
84d18f06 445 if (local_err) {
cc84d90f
HR
446 error_propagate(&cco->err, local_err);
447 }
448 cco->ret = ret;
5b7e1542
ZYW
449}
450
0e7e1989 451int bdrv_create(BlockDriver *drv, const char* filename,
83d0521a 452 QemuOpts *opts, Error **errp)
ea2384d3 453{
5b7e1542
ZYW
454 int ret;
455
456 Coroutine *co;
457 CreateCo cco = {
458 .drv = drv,
459 .filename = g_strdup(filename),
83d0521a 460 .opts = opts,
5b7e1542 461 .ret = NOT_DONE,
cc84d90f 462 .err = NULL,
5b7e1542
ZYW
463 };
464
c282e1fd 465 if (!drv->bdrv_create) {
cc84d90f 466 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
80168bff
LC
467 ret = -ENOTSUP;
468 goto out;
5b7e1542
ZYW
469 }
470
471 if (qemu_in_coroutine()) {
472 /* Fast-path if already in coroutine context */
473 bdrv_create_co_entry(&cco);
474 } else {
475 co = qemu_coroutine_create(bdrv_create_co_entry);
476 qemu_coroutine_enter(co, &cco);
477 while (cco.ret == NOT_DONE) {
b47ec2c4 478 aio_poll(qemu_get_aio_context(), true);
5b7e1542
ZYW
479 }
480 }
481
482 ret = cco.ret;
cc84d90f 483 if (ret < 0) {
84d18f06 484 if (cco.err) {
cc84d90f
HR
485 error_propagate(errp, cco.err);
486 } else {
487 error_setg_errno(errp, -ret, "Could not create image");
488 }
489 }
0e7e1989 490
80168bff
LC
491out:
492 g_free(cco.filename);
5b7e1542 493 return ret;
ea2384d3
FB
494}
495
c282e1fd 496int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
84a12e66
CH
497{
498 BlockDriver *drv;
cc84d90f
HR
499 Error *local_err = NULL;
500 int ret;
84a12e66 501
98289620 502 drv = bdrv_find_protocol(filename, true);
84a12e66 503 if (drv == NULL) {
cc84d90f 504 error_setg(errp, "Could not find protocol for file '%s'", filename);
16905d71 505 return -ENOENT;
84a12e66
CH
506 }
507
c282e1fd 508 ret = bdrv_create(drv, filename, opts, &local_err);
84d18f06 509 if (local_err) {
cc84d90f
HR
510 error_propagate(errp, local_err);
511 }
512 return ret;
84a12e66
CH
513}
514
3baca891 515void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
d34682cd
KW
516{
517 BlockDriver *drv = bs->drv;
3baca891 518 Error *local_err = NULL;
d34682cd
KW
519
520 memset(&bs->bl, 0, sizeof(bs->bl));
521
466ad822 522 if (!drv) {
3baca891 523 return;
466ad822
KW
524 }
525
526 /* Take some limits from the children as a default */
527 if (bs->file) {
3baca891
KW
528 bdrv_refresh_limits(bs->file, &local_err);
529 if (local_err) {
530 error_propagate(errp, local_err);
531 return;
532 }
466ad822 533 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
339064d5
KW
534 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
535 } else {
536 bs->bl.opt_mem_alignment = 512;
466ad822
KW
537 }
538
539 if (bs->backing_hd) {
3baca891
KW
540 bdrv_refresh_limits(bs->backing_hd, &local_err);
541 if (local_err) {
542 error_propagate(errp, local_err);
543 return;
544 }
466ad822
KW
545 bs->bl.opt_transfer_length =
546 MAX(bs->bl.opt_transfer_length,
547 bs->backing_hd->bl.opt_transfer_length);
339064d5
KW
548 bs->bl.opt_mem_alignment =
549 MAX(bs->bl.opt_mem_alignment,
550 bs->backing_hd->bl.opt_mem_alignment);
466ad822
KW
551 }
552
553 /* Then let the driver override it */
554 if (drv->bdrv_refresh_limits) {
3baca891 555 drv->bdrv_refresh_limits(bs, errp);
d34682cd 556 }
d34682cd
KW
557}
558
eba25057
JM
559/*
560 * Create a uniquely-named empty temporary file.
561 * Return 0 upon success, otherwise a negative errno value.
562 */
563int get_tmp_filename(char *filename, int size)
d5249393 564{
eba25057 565#ifdef _WIN32
3b9f94e1 566 char temp_dir[MAX_PATH];
eba25057
JM
567 /* GetTempFileName requires that its output buffer (4th param)
568 have length MAX_PATH or greater. */
569 assert(size >= MAX_PATH);
570 return (GetTempPath(MAX_PATH, temp_dir)
571 && GetTempFileName(temp_dir, "qem", 0, filename)
572 ? 0 : -GetLastError());
d5249393 573#else
67b915a5 574 int fd;
7ccfb2eb 575 const char *tmpdir;
0badc1ee 576 tmpdir = getenv("TMPDIR");
69bef793
AS
577 if (!tmpdir) {
578 tmpdir = "/var/tmp";
579 }
eba25057
JM
580 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
581 return -EOVERFLOW;
582 }
ea2384d3 583 fd = mkstemp(filename);
fe235a06
DH
584 if (fd < 0) {
585 return -errno;
586 }
587 if (close(fd) != 0) {
588 unlink(filename);
eba25057
JM
589 return -errno;
590 }
591 return 0;
d5249393 592#endif
eba25057 593}
fc01f7e7 594
84a12e66
CH
595/*
596 * Detect host devices. By convention, /dev/cdrom[N] is always
597 * recognized as a host CDROM.
598 */
599static BlockDriver *find_hdev_driver(const char *filename)
600{
601 int score_max = 0, score;
602 BlockDriver *drv = NULL, *d;
603
604 QLIST_FOREACH(d, &bdrv_drivers, list) {
605 if (d->bdrv_probe_device) {
606 score = d->bdrv_probe_device(filename);
607 if (score > score_max) {
608 score_max = score;
609 drv = d;
610 }
611 }
612 }
613
614 return drv;
615}
616
98289620
KW
617BlockDriver *bdrv_find_protocol(const char *filename,
618 bool allow_protocol_prefix)
83f64091
FB
619{
620 BlockDriver *drv1;
621 char protocol[128];
1cec71e3 622 int len;
83f64091 623 const char *p;
19cb3738 624
66f82cee
KW
625 /* TODO Drivers without bdrv_file_open must be specified explicitly */
626
39508e7a
CH
627 /*
628 * XXX(hch): we really should not let host device detection
629 * override an explicit protocol specification, but moving this
630 * later breaks access to device names with colons in them.
631 * Thanks to the brain-dead persistent naming schemes on udev-
632 * based Linux systems those actually are quite common.
633 */
634 drv1 = find_hdev_driver(filename);
635 if (drv1) {
636 return drv1;
637 }
638
98289620 639 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
39508e7a 640 return bdrv_find_format("file");
84a12e66 641 }
98289620 642
9e0b22f4
SH
643 p = strchr(filename, ':');
644 assert(p != NULL);
1cec71e3
AL
645 len = p - filename;
646 if (len > sizeof(protocol) - 1)
647 len = sizeof(protocol) - 1;
648 memcpy(protocol, filename, len);
649 protocol[len] = '\0';
8a22f02a 650 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 651 if (drv1->protocol_name &&
8a22f02a 652 !strcmp(drv1->protocol_name, protocol)) {
83f64091 653 return drv1;
8a22f02a 654 }
83f64091
FB
655 }
656 return NULL;
657}
658
f500a6d3 659static int find_image_format(BlockDriverState *bs, const char *filename,
34b5d2c6 660 BlockDriver **pdrv, Error **errp)
f3a5d3f8 661{
f500a6d3 662 int score, score_max;
f3a5d3f8
CH
663 BlockDriver *drv1, *drv;
664 uint8_t buf[2048];
f500a6d3 665 int ret = 0;
f8ea0b00 666
08a00559 667 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
8e895599 668 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
c98ac35d
SW
669 drv = bdrv_find_format("raw");
670 if (!drv) {
34b5d2c6 671 error_setg(errp, "Could not find raw image format");
c98ac35d
SW
672 ret = -ENOENT;
673 }
674 *pdrv = drv;
675 return ret;
1a396859 676 }
f8ea0b00 677
83f64091 678 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
83f64091 679 if (ret < 0) {
34b5d2c6
HR
680 error_setg_errno(errp, -ret, "Could not read image for determining its "
681 "format");
c98ac35d
SW
682 *pdrv = NULL;
683 return ret;
83f64091
FB
684 }
685
ea2384d3 686 score_max = 0;
84a12e66 687 drv = NULL;
8a22f02a 688 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
689 if (drv1->bdrv_probe) {
690 score = drv1->bdrv_probe(buf, ret, filename);
691 if (score > score_max) {
692 score_max = score;
693 drv = drv1;
694 }
0849bf08 695 }
fc01f7e7 696 }
c98ac35d 697 if (!drv) {
34b5d2c6
HR
698 error_setg(errp, "Could not determine image format: No compatible "
699 "driver found");
c98ac35d
SW
700 ret = -ENOENT;
701 }
702 *pdrv = drv;
703 return ret;
ea2384d3
FB
704}
705
51762288
SH
706/**
707 * Set the current 'total_sectors' value
65a9bb25 708 * Return 0 on success, -errno on error.
51762288
SH
709 */
710static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
711{
712 BlockDriver *drv = bs->drv;
713
396759ad
NB
714 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
715 if (bs->sg)
716 return 0;
717
51762288
SH
718 /* query actual device if possible, otherwise just trust the hint */
719 if (drv->bdrv_getlength) {
720 int64_t length = drv->bdrv_getlength(bs);
721 if (length < 0) {
722 return length;
723 }
7e382003 724 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
51762288
SH
725 }
726
727 bs->total_sectors = hint;
728 return 0;
729}
730
9e8f1835
PB
731/**
732 * Set open flags for a given discard mode
733 *
734 * Return 0 on success, -1 if the discard mode was invalid.
735 */
736int bdrv_parse_discard_flags(const char *mode, int *flags)
737{
738 *flags &= ~BDRV_O_UNMAP;
739
740 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
741 /* do nothing */
742 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
743 *flags |= BDRV_O_UNMAP;
744 } else {
745 return -1;
746 }
747
748 return 0;
749}
750
c3993cdc
SH
751/**
752 * Set open flags for a given cache mode
753 *
754 * Return 0 on success, -1 if the cache mode was invalid.
755 */
756int bdrv_parse_cache_flags(const char *mode, int *flags)
757{
758 *flags &= ~BDRV_O_CACHE_MASK;
759
760 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
761 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
762 } else if (!strcmp(mode, "directsync")) {
763 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
764 } else if (!strcmp(mode, "writeback")) {
765 *flags |= BDRV_O_CACHE_WB;
766 } else if (!strcmp(mode, "unsafe")) {
767 *flags |= BDRV_O_CACHE_WB;
768 *flags |= BDRV_O_NO_FLUSH;
769 } else if (!strcmp(mode, "writethrough")) {
770 /* this is the default */
771 } else {
772 return -1;
773 }
774
775 return 0;
776}
777
53fec9d3
SH
778/**
779 * The copy-on-read flag is actually a reference count so multiple users may
780 * use the feature without worrying about clobbering its previous state.
781 * Copy-on-read stays enabled until all users have called to disable it.
782 */
783void bdrv_enable_copy_on_read(BlockDriverState *bs)
784{
785 bs->copy_on_read++;
786}
787
788void bdrv_disable_copy_on_read(BlockDriverState *bs)
789{
790 assert(bs->copy_on_read > 0);
791 bs->copy_on_read--;
792}
793
b1e6fc08
KW
794/*
795 * Returns the flags that a temporary snapshot should get, based on the
796 * originally requested flags (the originally requested image will have flags
797 * like a backing file)
798 */
799static int bdrv_temp_snapshot_flags(int flags)
800{
801 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
802}
803
0b50cc88
KW
804/*
805 * Returns the flags that bs->file should get, based on the given flags for
806 * the parent BDS
807 */
808static int bdrv_inherited_flags(int flags)
809{
810 /* Enable protocol handling, disable format probing for bs->file */
811 flags |= BDRV_O_PROTOCOL;
812
813 /* Our block drivers take care to send flushes and respect unmap policy,
814 * so we can enable both unconditionally on lower layers. */
815 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
816
0b50cc88 817 /* Clear flags that only apply to the top layer */
5669b44d 818 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
0b50cc88
KW
819
820 return flags;
821}
822
317fc44e
KW
823/*
824 * Returns the flags that bs->backing_hd should get, based on the given flags
825 * for the parent BDS
826 */
827static int bdrv_backing_flags(int flags)
828{
829 /* backing files always opened read-only */
830 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
831
832 /* snapshot=on is handled on the top layer */
8bfea15d 833 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
317fc44e
KW
834
835 return flags;
836}
837
7b272452
KW
838static int bdrv_open_flags(BlockDriverState *bs, int flags)
839{
840 int open_flags = flags | BDRV_O_CACHE_WB;
841
842 /*
843 * Clear flags that are internal to the block layer before opening the
844 * image.
845 */
20cca275 846 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
7b272452
KW
847
848 /*
849 * Snapshots should be writable.
850 */
8bfea15d 851 if (flags & BDRV_O_TEMPORARY) {
7b272452
KW
852 open_flags |= BDRV_O_RDWR;
853 }
854
855 return open_flags;
856}
857
636ea370
KW
858static void bdrv_assign_node_name(BlockDriverState *bs,
859 const char *node_name,
860 Error **errp)
6913c0c2
BC
861{
862 if (!node_name) {
636ea370 863 return;
6913c0c2
BC
864 }
865
866 /* empty string node name is invalid */
867 if (node_name[0] == '\0') {
868 error_setg(errp, "Empty node name");
636ea370 869 return;
6913c0c2
BC
870 }
871
0c5e94ee
BC
872 /* takes care of avoiding namespaces collisions */
873 if (bdrv_find(node_name)) {
874 error_setg(errp, "node-name=%s is conflicting with a device id",
875 node_name);
636ea370 876 return;
0c5e94ee
BC
877 }
878
6913c0c2
BC
879 /* takes care of avoiding duplicates node names */
880 if (bdrv_find_node(node_name)) {
881 error_setg(errp, "Duplicate node name");
636ea370 882 return;
6913c0c2
BC
883 }
884
885 /* copy node name into the bs and insert it into the graph list */
886 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
887 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
6913c0c2
BC
888}
889
57915332
KW
890/*
891 * Common part for opening disk images and files
b6ad491a
KW
892 *
893 * Removes all processed options from *options.
57915332 894 */
f500a6d3 895static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
34b5d2c6 896 QDict *options, int flags, BlockDriver *drv, Error **errp)
57915332
KW
897{
898 int ret, open_flags;
035fccdf 899 const char *filename;
6913c0c2 900 const char *node_name = NULL;
34b5d2c6 901 Error *local_err = NULL;
57915332
KW
902
903 assert(drv != NULL);
6405875c 904 assert(bs->file == NULL);
707ff828 905 assert(options != NULL && bs->options != options);
57915332 906
45673671
KW
907 if (file != NULL) {
908 filename = file->filename;
909 } else {
910 filename = qdict_get_try_str(options, "filename");
911 }
912
765003db
KW
913 if (drv->bdrv_needs_filename && !filename) {
914 error_setg(errp, "The '%s' block driver requires a file name",
915 drv->format_name);
916 return -EINVAL;
917 }
918
45673671 919 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
28dcee10 920
6913c0c2 921 node_name = qdict_get_try_str(options, "node-name");
636ea370 922 bdrv_assign_node_name(bs, node_name, &local_err);
0fb6395c 923 if (local_err) {
636ea370
KW
924 error_propagate(errp, local_err);
925 return -EINVAL;
6913c0c2
BC
926 }
927 qdict_del(options, "node-name");
928
5d186eb0
KW
929 /* bdrv_open() with directly using a protocol as drv. This layer is already
930 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
931 * and return immediately. */
932 if (file != NULL && drv->bdrv_file_open) {
933 bdrv_swap(file, bs);
934 return 0;
935 }
936
57915332 937 bs->open_flags = flags;
1b7fd729 938 bs->guest_block_size = 512;
c25f53b0 939 bs->request_alignment = 512;
0d51b4de 940 bs->zero_beyond_eof = true;
b64ec4e4
FZ
941 open_flags = bdrv_open_flags(bs, flags);
942 bs->read_only = !(open_flags & BDRV_O_RDWR);
20cca275 943 bs->growable = !!(flags & BDRV_O_PROTOCOL);
b64ec4e4
FZ
944
945 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
8f94a6e4
KW
946 error_setg(errp,
947 !bs->read_only && bdrv_is_whitelisted(drv, true)
948 ? "Driver '%s' can only be used for read-only devices"
949 : "Driver '%s' is not whitelisted",
950 drv->format_name);
b64ec4e4
FZ
951 return -ENOTSUP;
952 }
57915332 953
53fec9d3 954 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
0ebd24e0
KW
955 if (flags & BDRV_O_COPY_ON_READ) {
956 if (!bs->read_only) {
957 bdrv_enable_copy_on_read(bs);
958 } else {
959 error_setg(errp, "Can't use copy-on-read on read-only device");
960 return -EINVAL;
961 }
53fec9d3
SH
962 }
963
c2ad1b0c
KW
964 if (filename != NULL) {
965 pstrcpy(bs->filename, sizeof(bs->filename), filename);
966 } else {
967 bs->filename[0] = '\0';
968 }
91af7014 969 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
57915332 970
57915332 971 bs->drv = drv;
7267c094 972 bs->opaque = g_malloc0(drv->instance_size);
57915332 973
03f541bd 974 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
e7c63796 975
66f82cee
KW
976 /* Open the image, either directly or using a protocol */
977 if (drv->bdrv_file_open) {
5d186eb0 978 assert(file == NULL);
030be321 979 assert(!drv->bdrv_needs_filename || filename != NULL);
34b5d2c6 980 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
f500a6d3 981 } else {
2af5ef70 982 if (file == NULL) {
34b5d2c6
HR
983 error_setg(errp, "Can't use '%s' as a block driver for the "
984 "protocol level", drv->format_name);
2af5ef70
KW
985 ret = -EINVAL;
986 goto free_and_fail;
987 }
f500a6d3 988 bs->file = file;
34b5d2c6 989 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
66f82cee
KW
990 }
991
57915332 992 if (ret < 0) {
84d18f06 993 if (local_err) {
34b5d2c6 994 error_propagate(errp, local_err);
2fa9aa59
DH
995 } else if (bs->filename[0]) {
996 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
34b5d2c6
HR
997 } else {
998 error_setg_errno(errp, -ret, "Could not open image");
999 }
57915332
KW
1000 goto free_and_fail;
1001 }
1002
51762288
SH
1003 ret = refresh_total_sectors(bs, bs->total_sectors);
1004 if (ret < 0) {
34b5d2c6 1005 error_setg_errno(errp, -ret, "Could not refresh total sector count");
51762288 1006 goto free_and_fail;
57915332 1007 }
51762288 1008
3baca891
KW
1009 bdrv_refresh_limits(bs, &local_err);
1010 if (local_err) {
1011 error_propagate(errp, local_err);
1012 ret = -EINVAL;
1013 goto free_and_fail;
1014 }
1015
c25f53b0 1016 assert(bdrv_opt_mem_align(bs) != 0);
47ea2de2 1017 assert((bs->request_alignment != 0) || bs->sg);
57915332
KW
1018 return 0;
1019
1020free_and_fail:
f500a6d3 1021 bs->file = NULL;
7267c094 1022 g_free(bs->opaque);
57915332
KW
1023 bs->opaque = NULL;
1024 bs->drv = NULL;
1025 return ret;
1026}
1027
5e5c4f63
KW
1028static QDict *parse_json_filename(const char *filename, Error **errp)
1029{
1030 QObject *options_obj;
1031 QDict *options;
1032 int ret;
1033
1034 ret = strstart(filename, "json:", &filename);
1035 assert(ret);
1036
1037 options_obj = qobject_from_json(filename);
1038 if (!options_obj) {
1039 error_setg(errp, "Could not parse the JSON options");
1040 return NULL;
1041 }
1042
1043 if (qobject_type(options_obj) != QTYPE_QDICT) {
1044 qobject_decref(options_obj);
1045 error_setg(errp, "Invalid JSON object given");
1046 return NULL;
1047 }
1048
1049 options = qobject_to_qdict(options_obj);
1050 qdict_flatten(options);
1051
1052 return options;
1053}
1054
b6ce07aa 1055/*
f54120ff
KW
1056 * Fills in default options for opening images and converts the legacy
1057 * filename/flags pair to option QDict entries.
b6ce07aa 1058 */
5e5c4f63 1059static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
17b005f1 1060 BlockDriver *drv, Error **errp)
ea2384d3 1061{
5e5c4f63 1062 const char *filename = *pfilename;
c2ad1b0c 1063 const char *drvname;
462f5bcf 1064 bool protocol = flags & BDRV_O_PROTOCOL;
e3fa4bfa 1065 bool parse_filename = false;
34b5d2c6 1066 Error *local_err = NULL;
83f64091 1067
5e5c4f63
KW
1068 /* Parse json: pseudo-protocol */
1069 if (filename && g_str_has_prefix(filename, "json:")) {
1070 QDict *json_options = parse_json_filename(filename, &local_err);
1071 if (local_err) {
1072 error_propagate(errp, local_err);
1073 return -EINVAL;
1074 }
1075
1076 /* Options given in the filename have lower priority than options
1077 * specified directly */
1078 qdict_join(*options, json_options, false);
1079 QDECREF(json_options);
1080 *pfilename = filename = NULL;
1081 }
1082
035fccdf 1083 /* Fetch the file name from the options QDict if necessary */
17b005f1 1084 if (protocol && filename) {
f54120ff
KW
1085 if (!qdict_haskey(*options, "filename")) {
1086 qdict_put(*options, "filename", qstring_from_str(filename));
1087 parse_filename = true;
1088 } else {
1089 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1090 "the same time");
1091 return -EINVAL;
1092 }
035fccdf
KW
1093 }
1094
c2ad1b0c 1095 /* Find the right block driver */
f54120ff 1096 filename = qdict_get_try_str(*options, "filename");
5acd9d81 1097 drvname = qdict_get_try_str(*options, "driver");
f54120ff 1098
17b005f1
KW
1099 if (drv) {
1100 if (drvname) {
1101 error_setg(errp, "Driver specified twice");
1102 return -EINVAL;
1103 }
1104 drvname = drv->format_name;
1105 qdict_put(*options, "driver", qstring_from_str(drvname));
1106 } else {
1107 if (!drvname && protocol) {
1108 if (filename) {
1109 drv = bdrv_find_protocol(filename, parse_filename);
1110 if (!drv) {
1111 error_setg(errp, "Unknown protocol");
1112 return -EINVAL;
1113 }
1114
1115 drvname = drv->format_name;
1116 qdict_put(*options, "driver", qstring_from_str(drvname));
1117 } else {
1118 error_setg(errp, "Must specify either driver or file");
f54120ff
KW
1119 return -EINVAL;
1120 }
17b005f1
KW
1121 } else if (drvname) {
1122 drv = bdrv_find_format(drvname);
1123 if (!drv) {
1124 error_setg(errp, "Unknown driver '%s'", drvname);
1125 return -ENOENT;
1126 }
98289620 1127 }
c2ad1b0c
KW
1128 }
1129
17b005f1 1130 assert(drv || !protocol);
c2ad1b0c 1131
f54120ff 1132 /* Driver-specific filename parsing */
17b005f1 1133 if (drv && drv->bdrv_parse_filename && parse_filename) {
5acd9d81 1134 drv->bdrv_parse_filename(filename, *options, &local_err);
84d18f06 1135 if (local_err) {
34b5d2c6 1136 error_propagate(errp, local_err);
f54120ff 1137 return -EINVAL;
6963a30d 1138 }
cd5d031e
HR
1139
1140 if (!drv->bdrv_needs_filename) {
1141 qdict_del(*options, "filename");
cd5d031e 1142 }
6963a30d
KW
1143 }
1144
f54120ff
KW
1145 return 0;
1146}
1147
8d24cce1
FZ
1148void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1149{
1150
826b6ca0
FZ
1151 if (bs->backing_hd) {
1152 assert(bs->backing_blocker);
1153 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1154 } else if (backing_hd) {
1155 error_setg(&bs->backing_blocker,
1156 "device is used as backing hd of '%s'",
1157 bs->device_name);
1158 }
1159
8d24cce1
FZ
1160 bs->backing_hd = backing_hd;
1161 if (!backing_hd) {
826b6ca0
FZ
1162 error_free(bs->backing_blocker);
1163 bs->backing_blocker = NULL;
8d24cce1
FZ
1164 goto out;
1165 }
1166 bs->open_flags &= ~BDRV_O_NO_BACKING;
1167 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1168 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1169 backing_hd->drv ? backing_hd->drv->format_name : "");
826b6ca0
FZ
1170
1171 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1172 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1173 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1174 bs->backing_blocker);
8d24cce1 1175out:
3baca891 1176 bdrv_refresh_limits(bs, NULL);
8d24cce1
FZ
1177}
1178
31ca6d07
KW
1179/*
1180 * Opens the backing file for a BlockDriverState if not yet open
1181 *
1182 * options is a QDict of options to pass to the block drivers, or NULL for an
1183 * empty set of options. The reference to the QDict is transferred to this
1184 * function (even on failure), so if the caller intends to reuse the dictionary,
1185 * it needs to use QINCREF() before calling bdrv_file_open.
1186 */
34b5d2c6 1187int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
9156df12 1188{
1ba4b6a5 1189 char *backing_filename = g_malloc0(PATH_MAX);
317fc44e 1190 int ret = 0;
9156df12 1191 BlockDriver *back_drv = NULL;
8d24cce1 1192 BlockDriverState *backing_hd;
34b5d2c6 1193 Error *local_err = NULL;
9156df12
PB
1194
1195 if (bs->backing_hd != NULL) {
31ca6d07 1196 QDECREF(options);
1ba4b6a5 1197 goto free_exit;
9156df12
PB
1198 }
1199
31ca6d07
KW
1200 /* NULL means an empty set of options */
1201 if (options == NULL) {
1202 options = qdict_new();
1203 }
1204
9156df12 1205 bs->open_flags &= ~BDRV_O_NO_BACKING;
1cb6f506
KW
1206 if (qdict_haskey(options, "file.filename")) {
1207 backing_filename[0] = '\0';
1208 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
31ca6d07 1209 QDECREF(options);
1ba4b6a5 1210 goto free_exit;
dbecebdd 1211 } else {
1ba4b6a5 1212 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
9156df12
PB
1213 }
1214
8ee79e70
KW
1215 if (!bs->drv || !bs->drv->supports_backing) {
1216 ret = -EINVAL;
1217 error_setg(errp, "Driver doesn't support backing files");
1218 QDECREF(options);
1219 goto free_exit;
1220 }
1221
8d24cce1
FZ
1222 backing_hd = bdrv_new("", errp);
1223
9156df12
PB
1224 if (bs->backing_format[0] != '\0') {
1225 back_drv = bdrv_find_format(bs->backing_format);
1226 }
1227
f67503e5 1228 assert(bs->backing_hd == NULL);
8d24cce1 1229 ret = bdrv_open(&backing_hd,
ddf5636d 1230 *backing_filename ? backing_filename : NULL, NULL, options,
317fc44e 1231 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
9156df12 1232 if (ret < 0) {
8d24cce1
FZ
1233 bdrv_unref(backing_hd);
1234 backing_hd = NULL;
9156df12 1235 bs->open_flags |= BDRV_O_NO_BACKING;
b04b6b6e
FZ
1236 error_setg(errp, "Could not open backing file: %s",
1237 error_get_pretty(local_err));
1238 error_free(local_err);
1ba4b6a5 1239 goto free_exit;
9156df12 1240 }
8d24cce1 1241 bdrv_set_backing_hd(bs, backing_hd);
d80ac658 1242
1ba4b6a5
BC
1243free_exit:
1244 g_free(backing_filename);
1245 return ret;
9156df12
PB
1246}
1247
da557aac
HR
1248/*
1249 * Opens a disk image whose options are given as BlockdevRef in another block
1250 * device's options.
1251 *
da557aac
HR
1252 * If allow_none is true, no image will be opened if filename is false and no
1253 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1254 *
1255 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1256 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1257 * itself, all options starting with "${bdref_key}." are considered part of the
1258 * BlockdevRef.
1259 *
1260 * The BlockdevRef will be removed from the options QDict.
f67503e5
HR
1261 *
1262 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
da557aac
HR
1263 */
1264int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1265 QDict *options, const char *bdref_key, int flags,
f7d9fd8c 1266 bool allow_none, Error **errp)
da557aac
HR
1267{
1268 QDict *image_options;
1269 int ret;
1270 char *bdref_key_dot;
1271 const char *reference;
1272
f67503e5
HR
1273 assert(pbs);
1274 assert(*pbs == NULL);
1275
da557aac
HR
1276 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1277 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1278 g_free(bdref_key_dot);
1279
1280 reference = qdict_get_try_str(options, bdref_key);
1281 if (!filename && !reference && !qdict_size(image_options)) {
1282 if (allow_none) {
1283 ret = 0;
1284 } else {
1285 error_setg(errp, "A block device must be specified for \"%s\"",
1286 bdref_key);
1287 ret = -EINVAL;
1288 }
b20e61e0 1289 QDECREF(image_options);
da557aac
HR
1290 goto done;
1291 }
1292
f7d9fd8c 1293 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
da557aac
HR
1294
1295done:
1296 qdict_del(options, bdref_key);
1297 return ret;
1298}
1299
6b8aeca5 1300int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
b998875d
KW
1301{
1302 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1ba4b6a5 1303 char *tmp_filename = g_malloc0(PATH_MAX + 1);
b998875d
KW
1304 int64_t total_size;
1305 BlockDriver *bdrv_qcow2;
83d0521a 1306 QemuOpts *opts = NULL;
b998875d
KW
1307 QDict *snapshot_options;
1308 BlockDriverState *bs_snapshot;
1309 Error *local_err;
1310 int ret;
1311
1312 /* if snapshot, we create a temporary backing file and open it
1313 instead of opening 'filename' directly */
1314
1315 /* Get the required size from the image */
f187743a
KW
1316 total_size = bdrv_getlength(bs);
1317 if (total_size < 0) {
6b8aeca5 1318 ret = total_size;
f187743a 1319 error_setg_errno(errp, -total_size, "Could not get image size");
1ba4b6a5 1320 goto out;
f187743a 1321 }
b998875d
KW
1322
1323 /* Create the temporary image */
1ba4b6a5 1324 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
b998875d
KW
1325 if (ret < 0) {
1326 error_setg_errno(errp, -ret, "Could not get temporary filename");
1ba4b6a5 1327 goto out;
b998875d
KW
1328 }
1329
1330 bdrv_qcow2 = bdrv_find_format("qcow2");
c282e1fd
CL
1331 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1332 &error_abort);
83d0521a 1333 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
c282e1fd 1334 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
83d0521a 1335 qemu_opts_del(opts);
b998875d
KW
1336 if (ret < 0) {
1337 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1338 "'%s': %s", tmp_filename,
1339 error_get_pretty(local_err));
1340 error_free(local_err);
1ba4b6a5 1341 goto out;
b998875d
KW
1342 }
1343
1344 /* Prepare a new options QDict for the temporary file */
1345 snapshot_options = qdict_new();
1346 qdict_put(snapshot_options, "file.driver",
1347 qstring_from_str("file"));
1348 qdict_put(snapshot_options, "file.filename",
1349 qstring_from_str(tmp_filename));
1350
98522f63 1351 bs_snapshot = bdrv_new("", &error_abort);
b998875d
KW
1352
1353 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
b1e6fc08 1354 flags, bdrv_qcow2, &local_err);
b998875d
KW
1355 if (ret < 0) {
1356 error_propagate(errp, local_err);
1ba4b6a5 1357 goto out;
b998875d
KW
1358 }
1359
1360 bdrv_append(bs_snapshot, bs);
1ba4b6a5
BC
1361
1362out:
1363 g_free(tmp_filename);
6b8aeca5 1364 return ret;
b998875d
KW
1365}
1366
b6ce07aa
KW
1367/*
1368 * Opens a disk image (raw, qcow2, vmdk, ...)
de9c0cec
KW
1369 *
1370 * options is a QDict of options to pass to the block drivers, or NULL for an
1371 * empty set of options. The reference to the QDict belongs to the block layer
1372 * after the call (even on failure), so if the caller intends to reuse the
1373 * dictionary, it needs to use QINCREF() before calling bdrv_open.
f67503e5
HR
1374 *
1375 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1376 * If it is not NULL, the referenced BDS will be reused.
ddf5636d
HR
1377 *
1378 * The reference parameter may be used to specify an existing block device which
1379 * should be opened. If specified, neither options nor a filename may be given,
1380 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
b6ce07aa 1381 */
ddf5636d
HR
1382int bdrv_open(BlockDriverState **pbs, const char *filename,
1383 const char *reference, QDict *options, int flags,
1384 BlockDriver *drv, Error **errp)
ea2384d3 1385{
b6ce07aa 1386 int ret;
f67503e5 1387 BlockDriverState *file = NULL, *bs;
74fe54f2 1388 const char *drvname;
34b5d2c6 1389 Error *local_err = NULL;
b1e6fc08 1390 int snapshot_flags = 0;
712e7874 1391
f67503e5
HR
1392 assert(pbs);
1393
ddf5636d
HR
1394 if (reference) {
1395 bool options_non_empty = options ? qdict_size(options) : false;
1396 QDECREF(options);
1397
1398 if (*pbs) {
1399 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1400 "another block device");
1401 return -EINVAL;
1402 }
1403
1404 if (filename || options_non_empty) {
1405 error_setg(errp, "Cannot reference an existing block device with "
1406 "additional options or a new filename");
1407 return -EINVAL;
1408 }
1409
1410 bs = bdrv_lookup_bs(reference, reference, errp);
1411 if (!bs) {
1412 return -ENODEV;
1413 }
1414 bdrv_ref(bs);
1415 *pbs = bs;
1416 return 0;
1417 }
1418
f67503e5
HR
1419 if (*pbs) {
1420 bs = *pbs;
1421 } else {
98522f63 1422 bs = bdrv_new("", &error_abort);
f67503e5
HR
1423 }
1424
de9c0cec
KW
1425 /* NULL means an empty set of options */
1426 if (options == NULL) {
1427 options = qdict_new();
1428 }
1429
17b005f1 1430 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
462f5bcf
KW
1431 if (local_err) {
1432 goto fail;
1433 }
1434
76c591b0
KW
1435 /* Find the right image format driver */
1436 drv = NULL;
1437 drvname = qdict_get_try_str(options, "driver");
1438 if (drvname) {
1439 drv = bdrv_find_format(drvname);
1440 qdict_del(options, "driver");
1441 if (!drv) {
1442 error_setg(errp, "Unknown driver: '%s'", drvname);
1443 ret = -EINVAL;
1444 goto fail;
1445 }
1446 }
1447
1448 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1449 if (drv && !drv->bdrv_file_open) {
1450 /* If the user explicitly wants a format driver here, we'll need to add
1451 * another layer for the protocol in bs->file */
1452 flags &= ~BDRV_O_PROTOCOL;
1453 }
1454
de9c0cec 1455 bs->options = options;
b6ad491a 1456 options = qdict_clone_shallow(options);
de9c0cec 1457
f500a6d3 1458 /* Open image file without format layer */
f4788adc
KW
1459 if ((flags & BDRV_O_PROTOCOL) == 0) {
1460 if (flags & BDRV_O_RDWR) {
1461 flags |= BDRV_O_ALLOW_RDWR;
1462 }
1463 if (flags & BDRV_O_SNAPSHOT) {
1464 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1465 flags = bdrv_backing_flags(flags);
1466 }
f500a6d3 1467
f4788adc
KW
1468 assert(file == NULL);
1469 ret = bdrv_open_image(&file, filename, options, "file",
1470 bdrv_inherited_flags(flags),
1471 true, &local_err);
1472 if (ret < 0) {
1473 goto fail;
1474 }
f500a6d3
KW
1475 }
1476
76c591b0
KW
1477 /* Image format probing */
1478 if (!drv && file) {
17b005f1
KW
1479 ret = find_image_format(file, filename, &drv, &local_err);
1480 if (ret < 0) {
8bfea15d 1481 goto fail;
2a05cbe4 1482 }
76c591b0 1483 } else if (!drv) {
17b005f1
KW
1484 error_setg(errp, "Must specify either driver or file");
1485 ret = -EINVAL;
8bfea15d 1486 goto fail;
ea2384d3 1487 }
b6ce07aa
KW
1488
1489 /* Open the image */
34b5d2c6 1490 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
b6ce07aa 1491 if (ret < 0) {
8bfea15d 1492 goto fail;
6987307c
CH
1493 }
1494
2a05cbe4 1495 if (file && (bs->file != file)) {
4f6fd349 1496 bdrv_unref(file);
f500a6d3
KW
1497 file = NULL;
1498 }
1499
b6ce07aa 1500 /* If there is a backing file, use it */
9156df12 1501 if ((flags & BDRV_O_NO_BACKING) == 0) {
31ca6d07
KW
1502 QDict *backing_options;
1503
5726d872 1504 qdict_extract_subqdict(options, &backing_options, "backing.");
34b5d2c6 1505 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
b6ce07aa 1506 if (ret < 0) {
b6ad491a 1507 goto close_and_fail;
b6ce07aa 1508 }
b6ce07aa
KW
1509 }
1510
91af7014
HR
1511 bdrv_refresh_filename(bs);
1512
b998875d
KW
1513 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1514 * temporary snapshot afterwards. */
b1e6fc08 1515 if (snapshot_flags) {
6b8aeca5 1516 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
b998875d 1517 if (local_err) {
b998875d
KW
1518 goto close_and_fail;
1519 }
1520 }
1521
b6ad491a 1522 /* Check if any unknown options were used */
5acd9d81 1523 if (options && (qdict_size(options) != 0)) {
b6ad491a 1524 const QDictEntry *entry = qdict_first(options);
5acd9d81
HR
1525 if (flags & BDRV_O_PROTOCOL) {
1526 error_setg(errp, "Block protocol '%s' doesn't support the option "
1527 "'%s'", drv->format_name, entry->key);
1528 } else {
1529 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1530 "support the option '%s'", drv->format_name,
1531 bs->device_name, entry->key);
1532 }
b6ad491a
KW
1533
1534 ret = -EINVAL;
1535 goto close_and_fail;
1536 }
b6ad491a 1537
b6ce07aa 1538 if (!bdrv_key_required(bs)) {
7d4b4ba5 1539 bdrv_dev_change_media_cb(bs, true);
c3adb58f
MA
1540 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1541 && !runstate_check(RUN_STATE_INMIGRATE)
1542 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1543 error_setg(errp,
1544 "Guest must be stopped for opening of encrypted image");
1545 ret = -EBUSY;
1546 goto close_and_fail;
b6ce07aa
KW
1547 }
1548
c3adb58f 1549 QDECREF(options);
f67503e5 1550 *pbs = bs;
b6ce07aa
KW
1551 return 0;
1552
8bfea15d 1553fail:
f500a6d3 1554 if (file != NULL) {
4f6fd349 1555 bdrv_unref(file);
f500a6d3 1556 }
de9c0cec 1557 QDECREF(bs->options);
b6ad491a 1558 QDECREF(options);
de9c0cec 1559 bs->options = NULL;
f67503e5
HR
1560 if (!*pbs) {
1561 /* If *pbs is NULL, a new BDS has been created in this function and
1562 needs to be freed now. Otherwise, it does not need to be closed,
1563 since it has not really been opened yet. */
1564 bdrv_unref(bs);
1565 }
84d18f06 1566 if (local_err) {
34b5d2c6
HR
1567 error_propagate(errp, local_err);
1568 }
b6ad491a 1569 return ret;
de9c0cec 1570
b6ad491a 1571close_and_fail:
f67503e5
HR
1572 /* See fail path, but now the BDS has to be always closed */
1573 if (*pbs) {
1574 bdrv_close(bs);
1575 } else {
1576 bdrv_unref(bs);
1577 }
b6ad491a 1578 QDECREF(options);
84d18f06 1579 if (local_err) {
34b5d2c6
HR
1580 error_propagate(errp, local_err);
1581 }
b6ce07aa
KW
1582 return ret;
1583}
1584
e971aa12
JC
1585typedef struct BlockReopenQueueEntry {
1586 bool prepared;
1587 BDRVReopenState state;
1588 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1589} BlockReopenQueueEntry;
1590
1591/*
1592 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1593 * reopen of multiple devices.
1594 *
1595 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1596 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1597 * be created and initialized. This newly created BlockReopenQueue should be
1598 * passed back in for subsequent calls that are intended to be of the same
1599 * atomic 'set'.
1600 *
1601 * bs is the BlockDriverState to add to the reopen queue.
1602 *
1603 * flags contains the open flags for the associated bs
1604 *
1605 * returns a pointer to bs_queue, which is either the newly allocated
1606 * bs_queue, or the existing bs_queue being used.
1607 *
1608 */
1609BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1610 BlockDriverState *bs, int flags)
1611{
1612 assert(bs != NULL);
1613
1614 BlockReopenQueueEntry *bs_entry;
1615 if (bs_queue == NULL) {
1616 bs_queue = g_new0(BlockReopenQueue, 1);
1617 QSIMPLEQ_INIT(bs_queue);
1618 }
1619
f1f25a2e
KW
1620 /* bdrv_open() masks this flag out */
1621 flags &= ~BDRV_O_PROTOCOL;
1622
e971aa12 1623 if (bs->file) {
f1f25a2e 1624 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
e971aa12
JC
1625 }
1626
1627 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1628 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1629
1630 bs_entry->state.bs = bs;
1631 bs_entry->state.flags = flags;
1632
1633 return bs_queue;
1634}
1635
1636/*
1637 * Reopen multiple BlockDriverStates atomically & transactionally.
1638 *
1639 * The queue passed in (bs_queue) must have been built up previous
1640 * via bdrv_reopen_queue().
1641 *
1642 * Reopens all BDS specified in the queue, with the appropriate
1643 * flags. All devices are prepared for reopen, and failure of any
1644 * device will cause all device changes to be abandonded, and intermediate
1645 * data cleaned up.
1646 *
1647 * If all devices prepare successfully, then the changes are committed
1648 * to all devices.
1649 *
1650 */
1651int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1652{
1653 int ret = -1;
1654 BlockReopenQueueEntry *bs_entry, *next;
1655 Error *local_err = NULL;
1656
1657 assert(bs_queue != NULL);
1658
1659 bdrv_drain_all();
1660
1661 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1662 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1663 error_propagate(errp, local_err);
1664 goto cleanup;
1665 }
1666 bs_entry->prepared = true;
1667 }
1668
1669 /* If we reach this point, we have success and just need to apply the
1670 * changes
1671 */
1672 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1673 bdrv_reopen_commit(&bs_entry->state);
1674 }
1675
1676 ret = 0;
1677
1678cleanup:
1679 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1680 if (ret && bs_entry->prepared) {
1681 bdrv_reopen_abort(&bs_entry->state);
1682 }
1683 g_free(bs_entry);
1684 }
1685 g_free(bs_queue);
1686 return ret;
1687}
1688
1689
1690/* Reopen a single BlockDriverState with the specified flags. */
1691int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1692{
1693 int ret = -1;
1694 Error *local_err = NULL;
1695 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1696
1697 ret = bdrv_reopen_multiple(queue, &local_err);
1698 if (local_err != NULL) {
1699 error_propagate(errp, local_err);
1700 }
1701 return ret;
1702}
1703
1704
1705/*
1706 * Prepares a BlockDriverState for reopen. All changes are staged in the
1707 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1708 * the block driver layer .bdrv_reopen_prepare()
1709 *
1710 * bs is the BlockDriverState to reopen
1711 * flags are the new open flags
1712 * queue is the reopen queue
1713 *
1714 * Returns 0 on success, non-zero on error. On error errp will be set
1715 * as well.
1716 *
1717 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1718 * It is the responsibility of the caller to then call the abort() or
1719 * commit() for any other BDS that have been left in a prepare() state
1720 *
1721 */
1722int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1723 Error **errp)
1724{
1725 int ret = -1;
1726 Error *local_err = NULL;
1727 BlockDriver *drv;
1728
1729 assert(reopen_state != NULL);
1730 assert(reopen_state->bs->drv != NULL);
1731 drv = reopen_state->bs->drv;
1732
1733 /* if we are to stay read-only, do not allow permission change
1734 * to r/w */
1735 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1736 reopen_state->flags & BDRV_O_RDWR) {
1737 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1738 reopen_state->bs->device_name);
1739 goto error;
1740 }
1741
1742
1743 ret = bdrv_flush(reopen_state->bs);
1744 if (ret) {
1745 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1746 strerror(-ret));
1747 goto error;
1748 }
1749
1750 if (drv->bdrv_reopen_prepare) {
1751 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1752 if (ret) {
1753 if (local_err != NULL) {
1754 error_propagate(errp, local_err);
1755 } else {
d8b6895f
LC
1756 error_setg(errp, "failed while preparing to reopen image '%s'",
1757 reopen_state->bs->filename);
e971aa12
JC
1758 }
1759 goto error;
1760 }
1761 } else {
1762 /* It is currently mandatory to have a bdrv_reopen_prepare()
1763 * handler for each supported drv. */
1764 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1765 drv->format_name, reopen_state->bs->device_name,
1766 "reopening of file");
1767 ret = -1;
1768 goto error;
1769 }
1770
1771 ret = 0;
1772
1773error:
1774 return ret;
1775}
1776
1777/*
1778 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1779 * makes them final by swapping the staging BlockDriverState contents into
1780 * the active BlockDriverState contents.
1781 */
1782void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1783{
1784 BlockDriver *drv;
1785
1786 assert(reopen_state != NULL);
1787 drv = reopen_state->bs->drv;
1788 assert(drv != NULL);
1789
1790 /* If there are any driver level actions to take */
1791 if (drv->bdrv_reopen_commit) {
1792 drv->bdrv_reopen_commit(reopen_state);
1793 }
1794
1795 /* set BDS specific flags now */
1796 reopen_state->bs->open_flags = reopen_state->flags;
1797 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1798 BDRV_O_CACHE_WB);
1799 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
355ef4ac 1800
3baca891 1801 bdrv_refresh_limits(reopen_state->bs, NULL);
e971aa12
JC
1802}
1803
1804/*
1805 * Abort the reopen, and delete and free the staged changes in
1806 * reopen_state
1807 */
1808void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1809{
1810 BlockDriver *drv;
1811
1812 assert(reopen_state != NULL);
1813 drv = reopen_state->bs->drv;
1814 assert(drv != NULL);
1815
1816 if (drv->bdrv_reopen_abort) {
1817 drv->bdrv_reopen_abort(reopen_state);
1818 }
1819}
1820
1821
fc01f7e7
FB
1822void bdrv_close(BlockDriverState *bs)
1823{
33384421
HR
1824 BdrvAioNotifier *ban, *ban_next;
1825
3cbc002c
PB
1826 if (bs->job) {
1827 block_job_cancel_sync(bs->job);
1828 }
58fda173
SH
1829 bdrv_drain_all(); /* complete I/O */
1830 bdrv_flush(bs);
1831 bdrv_drain_all(); /* in case flush left pending I/O */
d7d512f6 1832 notifier_list_notify(&bs->close_notifiers, bs);
7094f12f 1833
3cbc002c 1834 if (bs->drv) {
557df6ac 1835 if (bs->backing_hd) {
826b6ca0
FZ
1836 BlockDriverState *backing_hd = bs->backing_hd;
1837 bdrv_set_backing_hd(bs, NULL);
1838 bdrv_unref(backing_hd);
557df6ac 1839 }
ea2384d3 1840 bs->drv->bdrv_close(bs);
7267c094 1841 g_free(bs->opaque);
ea2384d3
FB
1842 bs->opaque = NULL;
1843 bs->drv = NULL;
53fec9d3 1844 bs->copy_on_read = 0;
a275fa42
PB
1845 bs->backing_file[0] = '\0';
1846 bs->backing_format[0] = '\0';
6405875c
PB
1847 bs->total_sectors = 0;
1848 bs->encrypted = 0;
1849 bs->valid_key = 0;
1850 bs->sg = 0;
1851 bs->growable = 0;
0d51b4de 1852 bs->zero_beyond_eof = false;
de9c0cec
KW
1853 QDECREF(bs->options);
1854 bs->options = NULL;
91af7014
HR
1855 QDECREF(bs->full_open_options);
1856 bs->full_open_options = NULL;
b338082b 1857
66f82cee 1858 if (bs->file != NULL) {
4f6fd349 1859 bdrv_unref(bs->file);
0ac9377d 1860 bs->file = NULL;
66f82cee 1861 }
b338082b 1862 }
98f90dba 1863
9ca11154
PH
1864 bdrv_dev_change_media_cb(bs, false);
1865
98f90dba
ZYW
1866 /*throttling disk I/O limits*/
1867 if (bs->io_limits_enabled) {
1868 bdrv_io_limits_disable(bs);
1869 }
33384421
HR
1870
1871 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1872 g_free(ban);
1873 }
1874 QLIST_INIT(&bs->aio_notifiers);
b338082b
FB
1875}
1876
2bc93fed
MK
1877void bdrv_close_all(void)
1878{
1879 BlockDriverState *bs;
1880
dc364f4c 1881 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
1882 AioContext *aio_context = bdrv_get_aio_context(bs);
1883
1884 aio_context_acquire(aio_context);
2bc93fed 1885 bdrv_close(bs);
ed78cda3 1886 aio_context_release(aio_context);
2bc93fed
MK
1887 }
1888}
1889
88266f5a
SH
1890/* Check if any requests are in-flight (including throttled requests) */
1891static bool bdrv_requests_pending(BlockDriverState *bs)
1892{
1893 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1894 return true;
1895 }
cc0681c4
BC
1896 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1897 return true;
1898 }
1899 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
88266f5a
SH
1900 return true;
1901 }
1902 if (bs->file && bdrv_requests_pending(bs->file)) {
1903 return true;
1904 }
1905 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1906 return true;
1907 }
1908 return false;
1909}
1910
922453bc
SH
1911/*
1912 * Wait for pending requests to complete across all BlockDriverStates
1913 *
1914 * This function does not flush data to disk, use bdrv_flush_all() for that
1915 * after calling this function.
4c355d53
ZYW
1916 *
1917 * Note that completion of an asynchronous I/O operation can trigger any
1918 * number of other I/O operations on other devices---for example a coroutine
1919 * can be arbitrarily complex and a constant flow of I/O can come until the
1920 * coroutine is complete. Because of this, it is not possible to have a
1921 * function to drain a single device's I/O queue.
922453bc
SH
1922 */
1923void bdrv_drain_all(void)
1924{
88266f5a
SH
1925 /* Always run first iteration so any pending completion BHs run */
1926 bool busy = true;
922453bc
SH
1927 BlockDriverState *bs;
1928
88266f5a 1929 while (busy) {
9b536adc
SH
1930 busy = false;
1931
dc364f4c 1932 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
9b536adc
SH
1933 AioContext *aio_context = bdrv_get_aio_context(bs);
1934 bool bs_busy;
1935
1936 aio_context_acquire(aio_context);
448ad91d 1937 bdrv_flush_io_queue(bs);
0b06ef3b 1938 bdrv_start_throttled_reqs(bs);
9b536adc
SH
1939 bs_busy = bdrv_requests_pending(bs);
1940 bs_busy |= aio_poll(aio_context, bs_busy);
1941 aio_context_release(aio_context);
922453bc 1942
9b536adc
SH
1943 busy |= bs_busy;
1944 }
922453bc
SH
1945 }
1946}
1947
dc364f4c
BC
1948/* make a BlockDriverState anonymous by removing from bdrv_state and
1949 * graph_bdrv_state list.
d22b2f41
RH
1950 Also, NULL terminate the device_name to prevent double remove */
1951void bdrv_make_anon(BlockDriverState *bs)
1952{
1953 if (bs->device_name[0] != '\0') {
dc364f4c 1954 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
d22b2f41
RH
1955 }
1956 bs->device_name[0] = '\0';
dc364f4c
BC
1957 if (bs->node_name[0] != '\0') {
1958 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1959 }
1960 bs->node_name[0] = '\0';
d22b2f41
RH
1961}
1962
e023b2e2
PB
1963static void bdrv_rebind(BlockDriverState *bs)
1964{
1965 if (bs->drv && bs->drv->bdrv_rebind) {
1966 bs->drv->bdrv_rebind(bs);
1967 }
1968}
1969
4ddc07ca
PB
1970static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1971 BlockDriverState *bs_src)
8802d1fd 1972{
4ddc07ca 1973 /* move some fields that need to stay attached to the device */
8802d1fd
JC
1974
1975 /* dev info */
4ddc07ca
PB
1976 bs_dest->dev_ops = bs_src->dev_ops;
1977 bs_dest->dev_opaque = bs_src->dev_opaque;
1978 bs_dest->dev = bs_src->dev;
1b7fd729 1979 bs_dest->guest_block_size = bs_src->guest_block_size;
4ddc07ca 1980 bs_dest->copy_on_read = bs_src->copy_on_read;
8802d1fd 1981
4ddc07ca 1982 bs_dest->enable_write_cache = bs_src->enable_write_cache;
c4a248a1 1983
cc0681c4
BC
1984 /* i/o throttled req */
1985 memcpy(&bs_dest->throttle_state,
1986 &bs_src->throttle_state,
1987 sizeof(ThrottleState));
1988 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1989 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
4ddc07ca 1990 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
8802d1fd 1991
8802d1fd 1992 /* r/w error */
4ddc07ca
PB
1993 bs_dest->on_read_error = bs_src->on_read_error;
1994 bs_dest->on_write_error = bs_src->on_write_error;
8802d1fd
JC
1995
1996 /* i/o status */
4ddc07ca
PB
1997 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1998 bs_dest->iostatus = bs_src->iostatus;
8802d1fd 1999
a9fc4408 2000 /* dirty bitmap */
e4654d2d 2001 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
a9fc4408 2002
9fcb0251
FZ
2003 /* reference count */
2004 bs_dest->refcnt = bs_src->refcnt;
2005
a9fc4408 2006 /* job */
4ddc07ca 2007 bs_dest->job = bs_src->job;
a9fc4408 2008
8802d1fd 2009 /* keep the same entry in bdrv_states */
4ddc07ca
PB
2010 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
2011 bs_src->device_name);
dc364f4c 2012 bs_dest->device_list = bs_src->device_list;
fbe40ff7
FZ
2013 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2014 sizeof(bs_dest->op_blockers));
4ddc07ca 2015}
8802d1fd 2016
4ddc07ca
PB
2017/*
2018 * Swap bs contents for two image chains while they are live,
2019 * while keeping required fields on the BlockDriverState that is
2020 * actually attached to a device.
2021 *
2022 * This will modify the BlockDriverState fields, and swap contents
2023 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2024 *
2025 * bs_new is required to be anonymous.
2026 *
2027 * This function does not create any image files.
2028 */
2029void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2030{
2031 BlockDriverState tmp;
f6801b83 2032
90ce8a06
BC
2033 /* The code needs to swap the node_name but simply swapping node_list won't
2034 * work so first remove the nodes from the graph list, do the swap then
2035 * insert them back if needed.
2036 */
2037 if (bs_new->node_name[0] != '\0') {
2038 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2039 }
2040 if (bs_old->node_name[0] != '\0') {
2041 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2042 }
2043
4ddc07ca
PB
2044 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
2045 assert(bs_new->device_name[0] == '\0');
e4654d2d 2046 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
4ddc07ca
PB
2047 assert(bs_new->job == NULL);
2048 assert(bs_new->dev == NULL);
4ddc07ca 2049 assert(bs_new->io_limits_enabled == false);
cc0681c4 2050 assert(!throttle_have_timer(&bs_new->throttle_state));
8802d1fd 2051
4ddc07ca
PB
2052 tmp = *bs_new;
2053 *bs_new = *bs_old;
2054 *bs_old = tmp;
a9fc4408 2055
4ddc07ca
PB
2056 /* there are some fields that should not be swapped, move them back */
2057 bdrv_move_feature_fields(&tmp, bs_old);
2058 bdrv_move_feature_fields(bs_old, bs_new);
2059 bdrv_move_feature_fields(bs_new, &tmp);
8802d1fd 2060
4ddc07ca
PB
2061 /* bs_new shouldn't be in bdrv_states even after the swap! */
2062 assert(bs_new->device_name[0] == '\0');
2063
2064 /* Check a few fields that should remain attached to the device */
2065 assert(bs_new->dev == NULL);
2066 assert(bs_new->job == NULL);
4ddc07ca 2067 assert(bs_new->io_limits_enabled == false);
cc0681c4 2068 assert(!throttle_have_timer(&bs_new->throttle_state));
e023b2e2 2069
90ce8a06
BC
2070 /* insert the nodes back into the graph node list if needed */
2071 if (bs_new->node_name[0] != '\0') {
2072 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2073 }
2074 if (bs_old->node_name[0] != '\0') {
2075 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2076 }
2077
e023b2e2 2078 bdrv_rebind(bs_new);
4ddc07ca
PB
2079 bdrv_rebind(bs_old);
2080}
2081
2082/*
2083 * Add new bs contents at the top of an image chain while the chain is
2084 * live, while keeping required fields on the top layer.
2085 *
2086 * This will modify the BlockDriverState fields, and swap contents
2087 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2088 *
2089 * bs_new is required to be anonymous.
2090 *
2091 * This function does not create any image files.
2092 */
2093void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2094{
2095 bdrv_swap(bs_new, bs_top);
2096
2097 /* The contents of 'tmp' will become bs_top, as we are
2098 * swapping bs_new and bs_top contents. */
8d24cce1 2099 bdrv_set_backing_hd(bs_top, bs_new);
8802d1fd
JC
2100}
2101
4f6fd349 2102static void bdrv_delete(BlockDriverState *bs)
b338082b 2103{
fa879d62 2104 assert(!bs->dev);
3e914655 2105 assert(!bs->job);
3718d8ab 2106 assert(bdrv_op_blocker_is_empty(bs));
4f6fd349 2107 assert(!bs->refcnt);
e4654d2d 2108 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
18846dee 2109
e1b5c52e
SH
2110 bdrv_close(bs);
2111
1b7bdbc1 2112 /* remove from list, if necessary */
d22b2f41 2113 bdrv_make_anon(bs);
34c6f050 2114
3ae59580 2115 drive_info_del(drive_get_by_blockdev(bs));
7267c094 2116 g_free(bs);
fc01f7e7
FB
2117}
2118
fa879d62
MA
2119int bdrv_attach_dev(BlockDriverState *bs, void *dev)
2120/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 2121{
fa879d62 2122 if (bs->dev) {
18846dee
MA
2123 return -EBUSY;
2124 }
fa879d62 2125 bs->dev = dev;
28a7282a 2126 bdrv_iostatus_reset(bs);
2a87151f
SH
2127
2128 /* We're expecting I/O from the device so bump up coroutine pool size */
2129 qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
18846dee
MA
2130 return 0;
2131}
2132
fa879d62
MA
2133/* TODO qdevified devices don't use this, remove when devices are qdevified */
2134void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 2135{
fa879d62
MA
2136 if (bdrv_attach_dev(bs, dev) < 0) {
2137 abort();
2138 }
2139}
2140
2141void bdrv_detach_dev(BlockDriverState *bs, void *dev)
2142/* TODO change to DeviceState *dev when all users are qdevified */
2143{
2144 assert(bs->dev == dev);
2145 bs->dev = NULL;
0e49de52
MA
2146 bs->dev_ops = NULL;
2147 bs->dev_opaque = NULL;
1b7fd729 2148 bs->guest_block_size = 512;
2a87151f 2149 qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
18846dee
MA
2150}
2151
fa879d62
MA
2152/* TODO change to return DeviceState * when all users are qdevified */
2153void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 2154{
fa879d62 2155 return bs->dev;
18846dee
MA
2156}
2157
0e49de52
MA
2158void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2159 void *opaque)
2160{
2161 bs->dev_ops = ops;
2162 bs->dev_opaque = opaque;
2163}
2164
7d4b4ba5 2165static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 2166{
145feb17 2167 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
6f382ed2 2168 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
7d4b4ba5 2169 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
6f382ed2
LC
2170 if (tray_was_closed) {
2171 /* tray open */
a5ee7bd4
WX
2172 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2173 true, &error_abort);
6f382ed2
LC
2174 }
2175 if (load) {
2176 /* tray close */
a5ee7bd4
WX
2177 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2178 false, &error_abort);
6f382ed2 2179 }
145feb17
MA
2180 }
2181}
2182
2c6942fa
MA
2183bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2184{
2185 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2186}
2187
025ccaa7
PB
2188void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2189{
2190 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2191 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2192 }
2193}
2194
e4def80b
MA
2195bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2196{
2197 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2198 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2199 }
2200 return false;
2201}
2202
145feb17
MA
2203static void bdrv_dev_resize_cb(BlockDriverState *bs)
2204{
2205 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2206 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
2207 }
2208}
2209
f107639a
MA
2210bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2211{
2212 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2213 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2214 }
2215 return false;
2216}
2217
e97fc193
AL
2218/*
2219 * Run consistency checks on an image
2220 *
e076f338 2221 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 2222 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 2223 * check are stored in res.
e97fc193 2224 */
4534ff54 2225int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193 2226{
908bcd54
HR
2227 if (bs->drv == NULL) {
2228 return -ENOMEDIUM;
2229 }
e97fc193
AL
2230 if (bs->drv->bdrv_check == NULL) {
2231 return -ENOTSUP;
2232 }
2233
e076f338 2234 memset(res, 0, sizeof(*res));
4534ff54 2235 return bs->drv->bdrv_check(bs, res, fix);
e97fc193
AL
2236}
2237
8a426614
KW
2238#define COMMIT_BUF_SECTORS 2048
2239
33e3963e
FB
2240/* commit COW file into the raw image */
2241int bdrv_commit(BlockDriverState *bs)
2242{
19cb3738 2243 BlockDriver *drv = bs->drv;
72706ea4 2244 int64_t sector, total_sectors, length, backing_length;
8a426614 2245 int n, ro, open_flags;
0bce597d 2246 int ret = 0;
72706ea4 2247 uint8_t *buf = NULL;
c2cba3d9 2248 char filename[PATH_MAX];
33e3963e 2249
19cb3738
FB
2250 if (!drv)
2251 return -ENOMEDIUM;
6bb45158 2252
4dca4b63
NS
2253 if (!bs->backing_hd) {
2254 return -ENOTSUP;
33e3963e
FB
2255 }
2256
3718d8ab
FZ
2257 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2258 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
2d3735d3
SH
2259 return -EBUSY;
2260 }
2261
4dca4b63 2262 ro = bs->backing_hd->read_only;
c2cba3d9
JM
2263 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2264 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
4dca4b63
NS
2265 open_flags = bs->backing_hd->open_flags;
2266
2267 if (ro) {
0bce597d
JC
2268 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2269 return -EACCES;
4dca4b63 2270 }
ea2384d3 2271 }
33e3963e 2272
72706ea4
JC
2273 length = bdrv_getlength(bs);
2274 if (length < 0) {
2275 ret = length;
2276 goto ro_cleanup;
2277 }
2278
2279 backing_length = bdrv_getlength(bs->backing_hd);
2280 if (backing_length < 0) {
2281 ret = backing_length;
2282 goto ro_cleanup;
2283 }
2284
2285 /* If our top snapshot is larger than the backing file image,
2286 * grow the backing file image if possible. If not possible,
2287 * we must return an error */
2288 if (length > backing_length) {
2289 ret = bdrv_truncate(bs->backing_hd, length);
2290 if (ret < 0) {
2291 goto ro_cleanup;
2292 }
2293 }
2294
2295 total_sectors = length >> BDRV_SECTOR_BITS;
857d4f46
KW
2296
2297 /* qemu_try_blockalign() for bs will choose an alignment that works for
2298 * bs->backing_hd as well, so no need to compare the alignment manually. */
2299 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2300 if (buf == NULL) {
2301 ret = -ENOMEM;
2302 goto ro_cleanup;
2303 }
8a426614
KW
2304
2305 for (sector = 0; sector < total_sectors; sector += n) {
d663640c
PB
2306 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2307 if (ret < 0) {
2308 goto ro_cleanup;
2309 }
2310 if (ret) {
dabfa6cc
KW
2311 ret = bdrv_read(bs, sector, buf, n);
2312 if (ret < 0) {
8a426614
KW
2313 goto ro_cleanup;
2314 }
2315
dabfa6cc
KW
2316 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2317 if (ret < 0) {
8a426614
KW
2318 goto ro_cleanup;
2319 }
ea2384d3 2320 }
33e3963e 2321 }
95389c86 2322
1d44952f
CH
2323 if (drv->bdrv_make_empty) {
2324 ret = drv->bdrv_make_empty(bs);
dabfa6cc
KW
2325 if (ret < 0) {
2326 goto ro_cleanup;
2327 }
1d44952f
CH
2328 bdrv_flush(bs);
2329 }
95389c86 2330
3f5075ae
CH
2331 /*
2332 * Make sure all data we wrote to the backing device is actually
2333 * stable on disk.
2334 */
dabfa6cc 2335 if (bs->backing_hd) {
3f5075ae 2336 bdrv_flush(bs->backing_hd);
dabfa6cc 2337 }
4dca4b63 2338
dabfa6cc 2339 ret = 0;
4dca4b63 2340ro_cleanup:
857d4f46 2341 qemu_vfree(buf);
4dca4b63
NS
2342
2343 if (ro) {
0bce597d
JC
2344 /* ignoring error return here */
2345 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
4dca4b63
NS
2346 }
2347
1d44952f 2348 return ret;
33e3963e
FB
2349}
2350
e8877497 2351int bdrv_commit_all(void)
6ab4b5ab
MA
2352{
2353 BlockDriverState *bs;
2354
dc364f4c 2355 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
2356 AioContext *aio_context = bdrv_get_aio_context(bs);
2357
2358 aio_context_acquire(aio_context);
272d2d8e
JC
2359 if (bs->drv && bs->backing_hd) {
2360 int ret = bdrv_commit(bs);
2361 if (ret < 0) {
ed78cda3 2362 aio_context_release(aio_context);
272d2d8e
JC
2363 return ret;
2364 }
e8877497 2365 }
ed78cda3 2366 aio_context_release(aio_context);
6ab4b5ab 2367 }
e8877497 2368 return 0;
6ab4b5ab
MA
2369}
2370
dbffbdcf
SH
2371/**
2372 * Remove an active request from the tracked requests list
2373 *
2374 * This function should be called when a tracked request is completing.
2375 */
2376static void tracked_request_end(BdrvTrackedRequest *req)
2377{
2dbafdc0
KW
2378 if (req->serialising) {
2379 req->bs->serialising_in_flight--;
2380 }
2381
dbffbdcf 2382 QLIST_REMOVE(req, list);
f4658285 2383 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
2384}
2385
2386/**
2387 * Add an active request to the tracked requests list
2388 */
2389static void tracked_request_begin(BdrvTrackedRequest *req,
2390 BlockDriverState *bs,
793ed47a
KW
2391 int64_t offset,
2392 unsigned int bytes, bool is_write)
dbffbdcf
SH
2393{
2394 *req = (BdrvTrackedRequest){
2395 .bs = bs,
2dbafdc0
KW
2396 .offset = offset,
2397 .bytes = bytes,
2398 .is_write = is_write,
2399 .co = qemu_coroutine_self(),
2400 .serialising = false,
7327145f
KW
2401 .overlap_offset = offset,
2402 .overlap_bytes = bytes,
dbffbdcf
SH
2403 };
2404
f4658285
SH
2405 qemu_co_queue_init(&req->wait_queue);
2406
dbffbdcf
SH
2407 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2408}
2409
e96126ff 2410static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
2dbafdc0 2411{
7327145f 2412 int64_t overlap_offset = req->offset & ~(align - 1);
e96126ff
KW
2413 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2414 - overlap_offset;
7327145f 2415
2dbafdc0
KW
2416 if (!req->serialising) {
2417 req->bs->serialising_in_flight++;
2418 req->serialising = true;
2419 }
7327145f
KW
2420
2421 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2422 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
2dbafdc0
KW
2423}
2424
d83947ac
SH
2425/**
2426 * Round a region to cluster boundaries
2427 */
343bded4
PB
2428void bdrv_round_to_clusters(BlockDriverState *bs,
2429 int64_t sector_num, int nb_sectors,
2430 int64_t *cluster_sector_num,
2431 int *cluster_nb_sectors)
d83947ac
SH
2432{
2433 BlockDriverInfo bdi;
2434
2435 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2436 *cluster_sector_num = sector_num;
2437 *cluster_nb_sectors = nb_sectors;
2438 } else {
2439 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2440 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2441 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2442 nb_sectors, c);
2443 }
2444}
2445
7327145f 2446static int bdrv_get_cluster_size(BlockDriverState *bs)
793ed47a
KW
2447{
2448 BlockDriverInfo bdi;
7327145f 2449 int ret;
793ed47a 2450
7327145f
KW
2451 ret = bdrv_get_info(bs, &bdi);
2452 if (ret < 0 || bdi.cluster_size == 0) {
2453 return bs->request_alignment;
793ed47a 2454 } else {
7327145f 2455 return bdi.cluster_size;
793ed47a
KW
2456 }
2457}
2458
f4658285 2459static bool tracked_request_overlaps(BdrvTrackedRequest *req,
793ed47a
KW
2460 int64_t offset, unsigned int bytes)
2461{
d83947ac 2462 /* aaaa bbbb */
7327145f 2463 if (offset >= req->overlap_offset + req->overlap_bytes) {
d83947ac
SH
2464 return false;
2465 }
2466 /* bbbb aaaa */
7327145f 2467 if (req->overlap_offset >= offset + bytes) {
d83947ac
SH
2468 return false;
2469 }
2470 return true;
f4658285
SH
2471}
2472
28de2dcd 2473static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
f4658285 2474{
2dbafdc0 2475 BlockDriverState *bs = self->bs;
f4658285
SH
2476 BdrvTrackedRequest *req;
2477 bool retry;
28de2dcd 2478 bool waited = false;
f4658285 2479
2dbafdc0 2480 if (!bs->serialising_in_flight) {
28de2dcd 2481 return false;
2dbafdc0
KW
2482 }
2483
f4658285
SH
2484 do {
2485 retry = false;
2486 QLIST_FOREACH(req, &bs->tracked_requests, list) {
2dbafdc0 2487 if (req == self || (!req->serialising && !self->serialising)) {
65afd211
KW
2488 continue;
2489 }
7327145f
KW
2490 if (tracked_request_overlaps(req, self->overlap_offset,
2491 self->overlap_bytes))
2492 {
5f8b6491
SH
2493 /* Hitting this means there was a reentrant request, for
2494 * example, a block driver issuing nested requests. This must
2495 * never happen since it means deadlock.
2496 */
2497 assert(qemu_coroutine_self() != req->co);
2498
6460440f
KW
2499 /* If the request is already (indirectly) waiting for us, or
2500 * will wait for us as soon as it wakes up, then just go on
2501 * (instead of producing a deadlock in the former case). */
2502 if (!req->waiting_for) {
2503 self->waiting_for = req;
2504 qemu_co_queue_wait(&req->wait_queue);
2505 self->waiting_for = NULL;
2506 retry = true;
28de2dcd 2507 waited = true;
6460440f
KW
2508 break;
2509 }
f4658285
SH
2510 }
2511 }
2512 } while (retry);
28de2dcd
KW
2513
2514 return waited;
f4658285
SH
2515}
2516
756e6736
KW
2517/*
2518 * Return values:
2519 * 0 - success
2520 * -EINVAL - backing format specified, but no file
2521 * -ENOSPC - can't update the backing file because no space is left in the
2522 * image file header
2523 * -ENOTSUP - format driver doesn't support changing the backing file
2524 */
2525int bdrv_change_backing_file(BlockDriverState *bs,
2526 const char *backing_file, const char *backing_fmt)
2527{
2528 BlockDriver *drv = bs->drv;
469ef350 2529 int ret;
756e6736 2530
5f377794
PB
2531 /* Backing file format doesn't make sense without a backing file */
2532 if (backing_fmt && !backing_file) {
2533 return -EINVAL;
2534 }
2535
756e6736 2536 if (drv->bdrv_change_backing_file != NULL) {
469ef350 2537 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 2538 } else {
469ef350 2539 ret = -ENOTSUP;
756e6736 2540 }
469ef350
PB
2541
2542 if (ret == 0) {
2543 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2544 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2545 }
2546 return ret;
756e6736
KW
2547}
2548
6ebdcee2
JC
2549/*
2550 * Finds the image layer in the chain that has 'bs' as its backing file.
2551 *
2552 * active is the current topmost image.
2553 *
2554 * Returns NULL if bs is not found in active's image chain,
2555 * or if active == bs.
4caf0fcd
JC
2556 *
2557 * Returns the bottommost base image if bs == NULL.
6ebdcee2
JC
2558 */
2559BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2560 BlockDriverState *bs)
2561{
4caf0fcd
JC
2562 while (active && bs != active->backing_hd) {
2563 active = active->backing_hd;
6ebdcee2
JC
2564 }
2565
4caf0fcd
JC
2566 return active;
2567}
6ebdcee2 2568
4caf0fcd
JC
2569/* Given a BDS, searches for the base layer. */
2570BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2571{
2572 return bdrv_find_overlay(bs, NULL);
6ebdcee2
JC
2573}
2574
2575typedef struct BlkIntermediateStates {
2576 BlockDriverState *bs;
2577 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2578} BlkIntermediateStates;
2579
2580
2581/*
2582 * Drops images above 'base' up to and including 'top', and sets the image
2583 * above 'top' to have base as its backing file.
2584 *
2585 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2586 * information in 'bs' can be properly updated.
2587 *
2588 * E.g., this will convert the following chain:
2589 * bottom <- base <- intermediate <- top <- active
2590 *
2591 * to
2592 *
2593 * bottom <- base <- active
2594 *
2595 * It is allowed for bottom==base, in which case it converts:
2596 *
2597 * base <- intermediate <- top <- active
2598 *
2599 * to
2600 *
2601 * base <- active
2602 *
54e26900
JC
2603 * If backing_file_str is non-NULL, it will be used when modifying top's
2604 * overlay image metadata.
2605 *
6ebdcee2
JC
2606 * Error conditions:
2607 * if active == top, that is considered an error
2608 *
2609 */
2610int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
54e26900 2611 BlockDriverState *base, const char *backing_file_str)
6ebdcee2
JC
2612{
2613 BlockDriverState *intermediate;
2614 BlockDriverState *base_bs = NULL;
2615 BlockDriverState *new_top_bs = NULL;
2616 BlkIntermediateStates *intermediate_state, *next;
2617 int ret = -EIO;
2618
2619 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2620 QSIMPLEQ_INIT(&states_to_delete);
2621
2622 if (!top->drv || !base->drv) {
2623 goto exit;
2624 }
2625
2626 new_top_bs = bdrv_find_overlay(active, top);
2627
2628 if (new_top_bs == NULL) {
2629 /* we could not find the image above 'top', this is an error */
2630 goto exit;
2631 }
2632
2633 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2634 * to do, no intermediate images */
2635 if (new_top_bs->backing_hd == base) {
2636 ret = 0;
2637 goto exit;
2638 }
2639
2640 intermediate = top;
2641
2642 /* now we will go down through the list, and add each BDS we find
2643 * into our deletion queue, until we hit the 'base'
2644 */
2645 while (intermediate) {
5839e53b 2646 intermediate_state = g_new0(BlkIntermediateStates, 1);
6ebdcee2
JC
2647 intermediate_state->bs = intermediate;
2648 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2649
2650 if (intermediate->backing_hd == base) {
2651 base_bs = intermediate->backing_hd;
2652 break;
2653 }
2654 intermediate = intermediate->backing_hd;
2655 }
2656 if (base_bs == NULL) {
2657 /* something went wrong, we did not end at the base. safely
2658 * unravel everything, and exit with error */
2659 goto exit;
2660 }
2661
2662 /* success - we can delete the intermediate states, and link top->base */
54e26900
JC
2663 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2664 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
6ebdcee2
JC
2665 base_bs->drv ? base_bs->drv->format_name : "");
2666 if (ret) {
2667 goto exit;
2668 }
920beae1 2669 bdrv_set_backing_hd(new_top_bs, base_bs);
6ebdcee2
JC
2670
2671 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2672 /* so that bdrv_close() does not recursively close the chain */
920beae1 2673 bdrv_set_backing_hd(intermediate_state->bs, NULL);
4f6fd349 2674 bdrv_unref(intermediate_state->bs);
6ebdcee2
JC
2675 }
2676 ret = 0;
2677
2678exit:
2679 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2680 g_free(intermediate_state);
2681 }
2682 return ret;
2683}
2684
2685
71d0770c
AL
2686static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2687 size_t size)
2688{
2689 int64_t len;
2690
1dd3a447
KW
2691 if (size > INT_MAX) {
2692 return -EIO;
2693 }
2694
71d0770c
AL
2695 if (!bdrv_is_inserted(bs))
2696 return -ENOMEDIUM;
2697
2698 if (bs->growable)
2699 return 0;
2700
2701 len = bdrv_getlength(bs);
2702
fbb7b4e0
KW
2703 if (offset < 0)
2704 return -EIO;
2705
2706 if ((offset > len) || (len - offset < size))
71d0770c
AL
2707 return -EIO;
2708
2709 return 0;
2710}
2711
2712static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2713 int nb_sectors)
2714{
54db38a4 2715 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
8f4754ed
KW
2716 return -EIO;
2717 }
2718
eb5a3165
JS
2719 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2720 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
2721}
2722
1c9805a3
SH
2723typedef struct RwCo {
2724 BlockDriverState *bs;
775aa8b6 2725 int64_t offset;
1c9805a3
SH
2726 QEMUIOVector *qiov;
2727 bool is_write;
2728 int ret;
4105eaaa 2729 BdrvRequestFlags flags;
1c9805a3
SH
2730} RwCo;
2731
2732static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 2733{
1c9805a3 2734 RwCo *rwco = opaque;
ea2384d3 2735
1c9805a3 2736 if (!rwco->is_write) {
775aa8b6
KW
2737 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2738 rwco->qiov->size, rwco->qiov,
4105eaaa 2739 rwco->flags);
775aa8b6
KW
2740 } else {
2741 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2742 rwco->qiov->size, rwco->qiov,
2743 rwco->flags);
1c9805a3
SH
2744 }
2745}
e7a8a783 2746
1c9805a3 2747/*
8d3b1a2d 2748 * Process a vectored synchronous request using coroutines
1c9805a3 2749 */
775aa8b6
KW
2750static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2751 QEMUIOVector *qiov, bool is_write,
2752 BdrvRequestFlags flags)
1c9805a3 2753{
1c9805a3
SH
2754 Coroutine *co;
2755 RwCo rwco = {
2756 .bs = bs,
775aa8b6 2757 .offset = offset,
8d3b1a2d 2758 .qiov = qiov,
1c9805a3
SH
2759 .is_write = is_write,
2760 .ret = NOT_DONE,
4105eaaa 2761 .flags = flags,
1c9805a3 2762 };
e7a8a783 2763
498e386c
ZYW
2764 /**
2765 * In sync call context, when the vcpu is blocked, this throttling timer
2766 * will not fire; so the I/O throttling function has to be disabled here
2767 * if it has been enabled.
2768 */
2769 if (bs->io_limits_enabled) {
2770 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2771 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2772 bdrv_io_limits_disable(bs);
2773 }
2774
1c9805a3
SH
2775 if (qemu_in_coroutine()) {
2776 /* Fast-path if already in coroutine context */
2777 bdrv_rw_co_entry(&rwco);
2778 } else {
2572b37a
SH
2779 AioContext *aio_context = bdrv_get_aio_context(bs);
2780
1c9805a3
SH
2781 co = qemu_coroutine_create(bdrv_rw_co_entry);
2782 qemu_coroutine_enter(co, &rwco);
2783 while (rwco.ret == NOT_DONE) {
2572b37a 2784 aio_poll(aio_context, true);
1c9805a3
SH
2785 }
2786 }
2787 return rwco.ret;
2788}
b338082b 2789
8d3b1a2d
KW
2790/*
2791 * Process a synchronous request using coroutines
2792 */
2793static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
4105eaaa 2794 int nb_sectors, bool is_write, BdrvRequestFlags flags)
8d3b1a2d
KW
2795{
2796 QEMUIOVector qiov;
2797 struct iovec iov = {
2798 .iov_base = (void *)buf,
2799 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2800 };
2801
da15ee51
KW
2802 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2803 return -EINVAL;
2804 }
2805
8d3b1a2d 2806 qemu_iovec_init_external(&qiov, &iov, 1);
775aa8b6
KW
2807 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2808 &qiov, is_write, flags);
8d3b1a2d
KW
2809}
2810
1c9805a3
SH
2811/* return < 0 if error. See bdrv_write() for the return codes */
2812int bdrv_read(BlockDriverState *bs, int64_t sector_num,
2813 uint8_t *buf, int nb_sectors)
2814{
4105eaaa 2815 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
fc01f7e7
FB
2816}
2817
07d27a44
MA
2818/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2819int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2820 uint8_t *buf, int nb_sectors)
2821{
2822 bool enabled;
2823 int ret;
2824
2825 enabled = bs->io_limits_enabled;
2826 bs->io_limits_enabled = false;
4e7395e8 2827 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
07d27a44
MA
2828 bs->io_limits_enabled = enabled;
2829 return ret;
2830}
2831
5fafdf24 2832/* Return < 0 if error. Important errors are:
19cb3738
FB
2833 -EIO generic I/O error (may happen for all errors)
2834 -ENOMEDIUM No media inserted.
2835 -EINVAL Invalid sector number or nb_sectors
2836 -EACCES Trying to write a read-only device
2837*/
5fafdf24 2838int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
2839 const uint8_t *buf, int nb_sectors)
2840{
4105eaaa 2841 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
83f64091
FB
2842}
2843
aa7bfbff
PL
2844int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2845 int nb_sectors, BdrvRequestFlags flags)
4105eaaa
PL
2846{
2847 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
aa7bfbff 2848 BDRV_REQ_ZERO_WRITE | flags);
8d3b1a2d
KW
2849}
2850
d75cbb5e
PL
2851/*
2852 * Completely zero out a block device with the help of bdrv_write_zeroes.
2853 * The operation is sped up by checking the block status and only writing
2854 * zeroes to the device if they currently do not return zeroes. Optional
2855 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2856 *
2857 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2858 */
2859int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2860{
d32f7c10 2861 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
d75cbb5e
PL
2862 int n;
2863
d32f7c10
MA
2864 target_sectors = bdrv_nb_sectors(bs);
2865 if (target_sectors < 0) {
2866 return target_sectors;
9ce10c0b 2867 }
9ce10c0b 2868
d75cbb5e 2869 for (;;) {
d32f7c10 2870 nb_sectors = target_sectors - sector_num;
d75cbb5e
PL
2871 if (nb_sectors <= 0) {
2872 return 0;
2873 }
2874 if (nb_sectors > INT_MAX) {
2875 nb_sectors = INT_MAX;
2876 }
2877 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
3d94ce60
PL
2878 if (ret < 0) {
2879 error_report("error getting block status at sector %" PRId64 ": %s",
2880 sector_num, strerror(-ret));
2881 return ret;
2882 }
d75cbb5e
PL
2883 if (ret & BDRV_BLOCK_ZERO) {
2884 sector_num += n;
2885 continue;
2886 }
2887 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2888 if (ret < 0) {
2889 error_report("error writing zeroes at sector %" PRId64 ": %s",
2890 sector_num, strerror(-ret));
2891 return ret;
2892 }
2893 sector_num += n;
2894 }
2895}
2896
a3ef6571 2897int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
83f64091 2898{
a3ef6571
KW
2899 QEMUIOVector qiov;
2900 struct iovec iov = {
2901 .iov_base = (void *)buf,
2902 .iov_len = bytes,
2903 };
9a8c4cce 2904 int ret;
83f64091 2905
a3ef6571
KW
2906 if (bytes < 0) {
2907 return -EINVAL;
83f64091
FB
2908 }
2909
a3ef6571
KW
2910 qemu_iovec_init_external(&qiov, &iov, 1);
2911 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2912 if (ret < 0) {
2913 return ret;
83f64091 2914 }
a3ef6571
KW
2915
2916 return bytes;
83f64091
FB
2917}
2918
8d3b1a2d 2919int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
83f64091 2920{
9a8c4cce 2921 int ret;
83f64091 2922
8407d5d7
KW
2923 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2924 if (ret < 0) {
2925 return ret;
83f64091
FB
2926 }
2927
8d3b1a2d
KW
2928 return qiov->size;
2929}
2930
2931int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
8407d5d7 2932 const void *buf, int bytes)
8d3b1a2d
KW
2933{
2934 QEMUIOVector qiov;
2935 struct iovec iov = {
2936 .iov_base = (void *) buf,
8407d5d7 2937 .iov_len = bytes,
8d3b1a2d
KW
2938 };
2939
8407d5d7
KW
2940 if (bytes < 0) {
2941 return -EINVAL;
2942 }
2943
8d3b1a2d
KW
2944 qemu_iovec_init_external(&qiov, &iov, 1);
2945 return bdrv_pwritev(bs, offset, &qiov);
83f64091 2946}
83f64091 2947
f08145fe
KW
2948/*
2949 * Writes to the file and ensures that no writes are reordered across this
2950 * request (acts as a barrier)
2951 *
2952 * Returns 0 on success, -errno in error cases.
2953 */
2954int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2955 const void *buf, int count)
2956{
2957 int ret;
2958
2959 ret = bdrv_pwrite(bs, offset, buf, count);
2960 if (ret < 0) {
2961 return ret;
2962 }
2963
f05fa4ad
PB
2964 /* No flush needed for cache modes that already do it */
2965 if (bs->enable_write_cache) {
f08145fe
KW
2966 bdrv_flush(bs);
2967 }
2968
2969 return 0;
2970}
2971
470c0504 2972static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
ab185921
SH
2973 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2974{
2975 /* Perform I/O through a temporary buffer so that users who scribble over
2976 * their read buffer while the operation is in progress do not end up
2977 * modifying the image file. This is critical for zero-copy guest I/O
2978 * where anything might happen inside guest memory.
2979 */
2980 void *bounce_buffer;
2981
79c053bd 2982 BlockDriver *drv = bs->drv;
ab185921
SH
2983 struct iovec iov;
2984 QEMUIOVector bounce_qiov;
2985 int64_t cluster_sector_num;
2986 int cluster_nb_sectors;
2987 size_t skip_bytes;
2988 int ret;
2989
2990 /* Cover entire cluster so no additional backing file I/O is required when
2991 * allocating cluster in the image file.
2992 */
343bded4
PB
2993 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2994 &cluster_sector_num, &cluster_nb_sectors);
ab185921 2995
470c0504
SH
2996 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2997 cluster_sector_num, cluster_nb_sectors);
ab185921
SH
2998
2999 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
857d4f46
KW
3000 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3001 if (bounce_buffer == NULL) {
3002 ret = -ENOMEM;
3003 goto err;
3004 }
3005
ab185921
SH
3006 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3007
79c053bd
SH
3008 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3009 &bounce_qiov);
ab185921
SH
3010 if (ret < 0) {
3011 goto err;
3012 }
3013
79c053bd
SH
3014 if (drv->bdrv_co_write_zeroes &&
3015 buffer_is_zero(bounce_buffer, iov.iov_len)) {
621f0589 3016 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
aa7bfbff 3017 cluster_nb_sectors, 0);
79c053bd 3018 } else {
f05fa4ad
PB
3019 /* This does not change the data on the disk, it is not necessary
3020 * to flush even in cache=writethrough mode.
3021 */
79c053bd 3022 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
ab185921 3023 &bounce_qiov);
79c053bd
SH
3024 }
3025
ab185921
SH
3026 if (ret < 0) {
3027 /* It might be okay to ignore write errors for guest requests. If this
3028 * is a deliberate copy-on-read then we don't want to ignore the error.
3029 * Simply report it in all cases.
3030 */
3031 goto err;
3032 }
3033
3034 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
03396148
MT
3035 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3036 nb_sectors * BDRV_SECTOR_SIZE);
ab185921
SH
3037
3038err:
3039 qemu_vfree(bounce_buffer);
3040 return ret;
3041}
3042
c5fbe571 3043/*
d0c7f642
KW
3044 * Forwards an already correctly aligned request to the BlockDriver. This
3045 * handles copy on read and zeroing after EOF; any other features must be
3046 * implemented by the caller.
c5fbe571 3047 */
d0c7f642 3048static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
65afd211 3049 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
ec746e10 3050 int64_t align, QEMUIOVector *qiov, int flags)
da1fa91d
KW
3051{
3052 BlockDriver *drv = bs->drv;
dbffbdcf 3053 int ret;
da1fa91d 3054
d0c7f642
KW
3055 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3056 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
da1fa91d 3057
d0c7f642
KW
3058 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3059 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
8eb029c2 3060 assert(!qiov || bytes == qiov->size);
d0c7f642
KW
3061
3062 /* Handle Copy on Read and associated serialisation */
470c0504 3063 if (flags & BDRV_REQ_COPY_ON_READ) {
7327145f
KW
3064 /* If we touch the same cluster it counts as an overlap. This
3065 * guarantees that allocating writes will be serialized and not race
3066 * with each other for the same cluster. For example, in copy-on-read
3067 * it ensures that the CoR read and write operations are atomic and
3068 * guest writes cannot interleave between them. */
3069 mark_request_serialising(req, bdrv_get_cluster_size(bs));
470c0504
SH
3070 }
3071
2dbafdc0 3072 wait_serialising_requests(req);
f4658285 3073
470c0504 3074 if (flags & BDRV_REQ_COPY_ON_READ) {
ab185921
SH
3075 int pnum;
3076
bdad13b9 3077 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
ab185921
SH
3078 if (ret < 0) {
3079 goto out;
3080 }
3081
3082 if (!ret || pnum != nb_sectors) {
470c0504 3083 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
3084 goto out;
3085 }
3086 }
3087
d0c7f642 3088 /* Forward the request to the BlockDriver */
893a8f62
MK
3089 if (!(bs->zero_beyond_eof && bs->growable)) {
3090 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3091 } else {
3092 /* Read zeros after EOF of growable BDSes */
4049082c 3093 int64_t total_sectors, max_nb_sectors;
893a8f62 3094
4049082c
MA
3095 total_sectors = bdrv_nb_sectors(bs);
3096 if (total_sectors < 0) {
3097 ret = total_sectors;
893a8f62
MK
3098 goto out;
3099 }
3100
5f5bcd80
KW
3101 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3102 align >> BDRV_SECTOR_BITS);
893a8f62 3103 if (max_nb_sectors > 0) {
33f461e0
KW
3104 QEMUIOVector local_qiov;
3105 size_t local_sectors;
3106
3107 max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
3108 local_sectors = MIN(max_nb_sectors, nb_sectors);
3109
3110 qemu_iovec_init(&local_qiov, qiov->niov);
3111 qemu_iovec_concat(&local_qiov, qiov, 0,
3112 local_sectors * BDRV_SECTOR_SIZE);
3113
3114 ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
3115 &local_qiov);
3116
3117 qemu_iovec_destroy(&local_qiov);
893a8f62
MK
3118 } else {
3119 ret = 0;
3120 }
3121
3122 /* Reading beyond end of file is supposed to produce zeroes */
3123 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3124 uint64_t offset = MAX(0, total_sectors - sector_num);
3125 uint64_t bytes = (sector_num + nb_sectors - offset) *
3126 BDRV_SECTOR_SIZE;
3127 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3128 }
3129 }
ab185921
SH
3130
3131out:
dbffbdcf 3132 return ret;
da1fa91d
KW
3133}
3134
d0c7f642
KW
3135/*
3136 * Handle a read request in coroutine context
3137 */
1b0288ae
KW
3138static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3139 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
d0c7f642
KW
3140 BdrvRequestFlags flags)
3141{
3142 BlockDriver *drv = bs->drv;
65afd211
KW
3143 BdrvTrackedRequest req;
3144
1b0288ae
KW
3145 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3146 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3147 uint8_t *head_buf = NULL;
3148 uint8_t *tail_buf = NULL;
3149 QEMUIOVector local_qiov;
3150 bool use_local_qiov = false;
d0c7f642
KW
3151 int ret;
3152
3153 if (!drv) {
3154 return -ENOMEDIUM;
3155 }
1b0288ae 3156 if (bdrv_check_byte_request(bs, offset, bytes)) {
d0c7f642
KW
3157 return -EIO;
3158 }
3159
3160 if (bs->copy_on_read) {
3161 flags |= BDRV_REQ_COPY_ON_READ;
3162 }
3163
3164 /* throttling disk I/O */
3165 if (bs->io_limits_enabled) {
d5103588 3166 bdrv_io_limits_intercept(bs, bytes, false);
1b0288ae
KW
3167 }
3168
3169 /* Align read if necessary by padding qiov */
3170 if (offset & (align - 1)) {
3171 head_buf = qemu_blockalign(bs, align);
3172 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3173 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3174 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3175 use_local_qiov = true;
3176
3177 bytes += offset & (align - 1);
3178 offset = offset & ~(align - 1);
3179 }
3180
3181 if ((offset + bytes) & (align - 1)) {
3182 if (!use_local_qiov) {
3183 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3184 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3185 use_local_qiov = true;
3186 }
3187 tail_buf = qemu_blockalign(bs, align);
3188 qemu_iovec_add(&local_qiov, tail_buf,
3189 align - ((offset + bytes) & (align - 1)));
3190
3191 bytes = ROUND_UP(bytes, align);
3192 }
3193
65afd211 3194 tracked_request_begin(&req, bs, offset, bytes, false);
ec746e10 3195 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
1b0288ae
KW
3196 use_local_qiov ? &local_qiov : qiov,
3197 flags);
65afd211 3198 tracked_request_end(&req);
1b0288ae
KW
3199
3200 if (use_local_qiov) {
3201 qemu_iovec_destroy(&local_qiov);
3202 qemu_vfree(head_buf);
3203 qemu_vfree(tail_buf);
d0c7f642
KW
3204 }
3205
d0c7f642
KW
3206 return ret;
3207}
3208
1b0288ae
KW
3209static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3210 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3211 BdrvRequestFlags flags)
3212{
3213 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3214 return -EINVAL;
3215 }
3216
3217 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3218 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3219}
3220
c5fbe571 3221int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
3222 int nb_sectors, QEMUIOVector *qiov)
3223{
c5fbe571 3224 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 3225
470c0504
SH
3226 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3227}
3228
3229int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3230 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3231{
3232 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3233
3234 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3235 BDRV_REQ_COPY_ON_READ);
c5fbe571
SH
3236}
3237
c31cb707
PL
3238/* if no limit is specified in the BlockLimits use a default
3239 * of 32768 512-byte sectors (16 MiB) per request.
3240 */
3241#define MAX_WRITE_ZEROES_DEFAULT 32768
3242
f08f2dda 3243static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
aa7bfbff 3244 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
f08f2dda
SH
3245{
3246 BlockDriver *drv = bs->drv;
3247 QEMUIOVector qiov;
c31cb707
PL
3248 struct iovec iov = {0};
3249 int ret = 0;
f08f2dda 3250
c31cb707
PL
3251 int max_write_zeroes = bs->bl.max_write_zeroes ?
3252 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
621f0589 3253
c31cb707
PL
3254 while (nb_sectors > 0 && !ret) {
3255 int num = nb_sectors;
3256
b8d71c09
PB
3257 /* Align request. Block drivers can expect the "bulk" of the request
3258 * to be aligned.
3259 */
3260 if (bs->bl.write_zeroes_alignment
3261 && num > bs->bl.write_zeroes_alignment) {
3262 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3263 /* Make a small request up to the first aligned sector. */
c31cb707 3264 num = bs->bl.write_zeroes_alignment;
b8d71c09
PB
3265 num -= sector_num % bs->bl.write_zeroes_alignment;
3266 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3267 /* Shorten the request to the last aligned sector. num cannot
3268 * underflow because num > bs->bl.write_zeroes_alignment.
3269 */
3270 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
c31cb707 3271 }
621f0589 3272 }
f08f2dda 3273
c31cb707
PL
3274 /* limit request size */
3275 if (num > max_write_zeroes) {
3276 num = max_write_zeroes;
3277 }
3278
3279 ret = -ENOTSUP;
3280 /* First try the efficient write zeroes operation */
3281 if (drv->bdrv_co_write_zeroes) {
3282 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3283 }
3284
3285 if (ret == -ENOTSUP) {
3286 /* Fall back to bounce buffer if write zeroes is unsupported */
3287 iov.iov_len = num * BDRV_SECTOR_SIZE;
3288 if (iov.iov_base == NULL) {
857d4f46
KW
3289 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3290 if (iov.iov_base == NULL) {
3291 ret = -ENOMEM;
3292 goto fail;
3293 }
b8d71c09 3294 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
c31cb707
PL
3295 }
3296 qemu_iovec_init_external(&qiov, &iov, 1);
f08f2dda 3297
c31cb707 3298 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
b8d71c09
PB
3299
3300 /* Keep bounce buffer around if it is big enough for all
3301 * all future requests.
3302 */
3303 if (num < max_write_zeroes) {
3304 qemu_vfree(iov.iov_base);
3305 iov.iov_base = NULL;
3306 }
c31cb707
PL
3307 }
3308
3309 sector_num += num;
3310 nb_sectors -= num;
3311 }
f08f2dda 3312
857d4f46 3313fail:
f08f2dda
SH
3314 qemu_vfree(iov.iov_base);
3315 return ret;
3316}
3317
c5fbe571 3318/*
b404f720 3319 * Forwards an already correctly aligned write request to the BlockDriver.
c5fbe571 3320 */
b404f720 3321static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
65afd211
KW
3322 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3323 QEMUIOVector *qiov, int flags)
c5fbe571
SH
3324{
3325 BlockDriver *drv = bs->drv;
28de2dcd 3326 bool waited;
6b7cb247 3327 int ret;
da1fa91d 3328
b404f720
KW
3329 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3330 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
f4658285 3331
b404f720
KW
3332 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3333 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
8eb029c2 3334 assert(!qiov || bytes == qiov->size);
cc0681c4 3335
28de2dcd
KW
3336 waited = wait_serialising_requests(req);
3337 assert(!waited || !req->serialising);
af91f9a7
KW
3338 assert(req->overlap_offset <= offset);
3339 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
244eadef 3340
65afd211 3341 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
d616b224 3342
465bee1d
PL
3343 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3344 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3345 qemu_iovec_is_zero(qiov)) {
3346 flags |= BDRV_REQ_ZERO_WRITE;
3347 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3348 flags |= BDRV_REQ_MAY_UNMAP;
3349 }
3350 }
3351
d616b224
SH
3352 if (ret < 0) {
3353 /* Do nothing, write notifier decided to fail this request */
3354 } else if (flags & BDRV_REQ_ZERO_WRITE) {
9e1cb96d 3355 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
aa7bfbff 3356 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
f08f2dda 3357 } else {
9e1cb96d 3358 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
f08f2dda
SH
3359 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3360 }
9e1cb96d 3361 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
6b7cb247 3362
f05fa4ad
PB
3363 if (ret == 0 && !bs->enable_write_cache) {
3364 ret = bdrv_co_flush(bs);
3365 }
3366
e4654d2d 3367 bdrv_set_dirty(bs, sector_num, nb_sectors);
da1fa91d 3368
5366d0c8 3369 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
5e5a94b6 3370
df2a6f29
PB
3371 if (bs->growable && ret >= 0) {
3372 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3373 }
da1fa91d 3374
6b7cb247 3375 return ret;
da1fa91d
KW
3376}
3377
b404f720
KW
3378/*
3379 * Handle a write request in coroutine context
3380 */
6601553e
KW
3381static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3382 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
b404f720
KW
3383 BdrvRequestFlags flags)
3384{
65afd211 3385 BdrvTrackedRequest req;
3b8242e0
KW
3386 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3387 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3388 uint8_t *head_buf = NULL;
3389 uint8_t *tail_buf = NULL;
3390 QEMUIOVector local_qiov;
3391 bool use_local_qiov = false;
b404f720
KW
3392 int ret;
3393
3394 if (!bs->drv) {
3395 return -ENOMEDIUM;
3396 }
3397 if (bs->read_only) {
3398 return -EACCES;
3399 }
6601553e 3400 if (bdrv_check_byte_request(bs, offset, bytes)) {
b404f720
KW
3401 return -EIO;
3402 }
3403
b404f720
KW
3404 /* throttling disk I/O */
3405 if (bs->io_limits_enabled) {
d5103588 3406 bdrv_io_limits_intercept(bs, bytes, true);
b404f720
KW
3407 }
3408
3b8242e0
KW
3409 /*
3410 * Align write if necessary by performing a read-modify-write cycle.
3411 * Pad qiov with the read parts and be sure to have a tracked request not
3412 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3413 */
65afd211 3414 tracked_request_begin(&req, bs, offset, bytes, true);
3b8242e0
KW
3415
3416 if (offset & (align - 1)) {
3417 QEMUIOVector head_qiov;
3418 struct iovec head_iov;
3419
3420 mark_request_serialising(&req, align);
3421 wait_serialising_requests(&req);
3422
3423 head_buf = qemu_blockalign(bs, align);
3424 head_iov = (struct iovec) {
3425 .iov_base = head_buf,
3426 .iov_len = align,
3427 };
3428 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3429
9e1cb96d 3430 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
3b8242e0
KW
3431 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3432 align, &head_qiov, 0);
3433 if (ret < 0) {
3434 goto fail;
3435 }
9e1cb96d 3436 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
3b8242e0
KW
3437
3438 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3439 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3440 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3441 use_local_qiov = true;
3442
3443 bytes += offset & (align - 1);
3444 offset = offset & ~(align - 1);
3445 }
3446
3447 if ((offset + bytes) & (align - 1)) {
3448 QEMUIOVector tail_qiov;
3449 struct iovec tail_iov;
3450 size_t tail_bytes;
28de2dcd 3451 bool waited;
3b8242e0
KW
3452
3453 mark_request_serialising(&req, align);
28de2dcd
KW
3454 waited = wait_serialising_requests(&req);
3455 assert(!waited || !use_local_qiov);
3b8242e0
KW
3456
3457 tail_buf = qemu_blockalign(bs, align);
3458 tail_iov = (struct iovec) {
3459 .iov_base = tail_buf,
3460 .iov_len = align,
3461 };
3462 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3463
9e1cb96d 3464 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
3b8242e0
KW
3465 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3466 align, &tail_qiov, 0);
3467 if (ret < 0) {
3468 goto fail;
3469 }
9e1cb96d 3470 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
3b8242e0
KW
3471
3472 if (!use_local_qiov) {
3473 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3474 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3475 use_local_qiov = true;
3476 }
3477
3478 tail_bytes = (offset + bytes) & (align - 1);
3479 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3480
3481 bytes = ROUND_UP(bytes, align);
3482 }
3483
3484 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3485 use_local_qiov ? &local_qiov : qiov,
3486 flags);
3487
3488fail:
65afd211 3489 tracked_request_end(&req);
b404f720 3490
3b8242e0
KW
3491 if (use_local_qiov) {
3492 qemu_iovec_destroy(&local_qiov);
3b8242e0 3493 }
99c4a85c
KW
3494 qemu_vfree(head_buf);
3495 qemu_vfree(tail_buf);
3b8242e0 3496
b404f720
KW
3497 return ret;
3498}
3499
6601553e
KW
3500static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3501 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3502 BdrvRequestFlags flags)
3503{
3504 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3505 return -EINVAL;
3506 }
3507
3508 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3509 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3510}
3511
c5fbe571
SH
3512int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3513 int nb_sectors, QEMUIOVector *qiov)
3514{
3515 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3516
f08f2dda
SH
3517 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3518}
3519
3520int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
aa7bfbff
PL
3521 int64_t sector_num, int nb_sectors,
3522 BdrvRequestFlags flags)
f08f2dda 3523{
94d6ff21 3524 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
f08f2dda 3525
d32f35cb
PL
3526 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3527 flags &= ~BDRV_REQ_MAY_UNMAP;
3528 }
3529
f08f2dda 3530 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
aa7bfbff 3531 BDRV_REQ_ZERO_WRITE | flags);
c5fbe571
SH
3532}
3533
83f64091
FB
3534/**
3535 * Truncate file to 'offset' bytes (needed only for file protocols)
3536 */
3537int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3538{
3539 BlockDriver *drv = bs->drv;
51762288 3540 int ret;
83f64091 3541 if (!drv)
19cb3738 3542 return -ENOMEDIUM;
83f64091
FB
3543 if (!drv->bdrv_truncate)
3544 return -ENOTSUP;
59f2689d
NS
3545 if (bs->read_only)
3546 return -EACCES;
9c75e168 3547
51762288
SH
3548 ret = drv->bdrv_truncate(bs, offset);
3549 if (ret == 0) {
3550 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 3551 bdrv_dev_resize_cb(bs);
51762288
SH
3552 }
3553 return ret;
83f64091
FB
3554}
3555
4a1d5e1f
FZ
3556/**
3557 * Length of a allocated file in bytes. Sparse files are counted by actual
3558 * allocated space. Return < 0 if error or unknown.
3559 */
3560int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3561{
3562 BlockDriver *drv = bs->drv;
3563 if (!drv) {
3564 return -ENOMEDIUM;
3565 }
3566 if (drv->bdrv_get_allocated_file_size) {
3567 return drv->bdrv_get_allocated_file_size(bs);
3568 }
3569 if (bs->file) {
3570 return bdrv_get_allocated_file_size(bs->file);
3571 }
3572 return -ENOTSUP;
3573}
3574
83f64091 3575/**
65a9bb25 3576 * Return number of sectors on success, -errno on error.
83f64091 3577 */
65a9bb25 3578int64_t bdrv_nb_sectors(BlockDriverState *bs)
83f64091
FB
3579{
3580 BlockDriver *drv = bs->drv;
65a9bb25 3581
83f64091 3582 if (!drv)
19cb3738 3583 return -ENOMEDIUM;
51762288 3584
b94a2610
KW
3585 if (drv->has_variable_length) {
3586 int ret = refresh_total_sectors(bs, bs->total_sectors);
3587 if (ret < 0) {
3588 return ret;
46a4e4e6 3589 }
83f64091 3590 }
65a9bb25
MA
3591 return bs->total_sectors;
3592}
3593
3594/**
3595 * Return length in bytes on success, -errno on error.
3596 * The length is always a multiple of BDRV_SECTOR_SIZE.
3597 */
3598int64_t bdrv_getlength(BlockDriverState *bs)
3599{
3600 int64_t ret = bdrv_nb_sectors(bs);
3601
3602 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
fc01f7e7
FB
3603}
3604
19cb3738 3605/* return 0 as number of sectors if no device present or error */
96b8f136 3606void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 3607{
65a9bb25
MA
3608 int64_t nb_sectors = bdrv_nb_sectors(bs);
3609
3610 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
fc01f7e7 3611}
cf98951b 3612
ff06f5f3
PB
3613void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3614 BlockdevOnError on_write_error)
abd7f68d
MA
3615{
3616 bs->on_read_error = on_read_error;
3617 bs->on_write_error = on_write_error;
3618}
3619
1ceee0d5 3620BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
abd7f68d
MA
3621{
3622 return is_read ? bs->on_read_error : bs->on_write_error;
3623}
3624
3e1caa5f
PB
3625BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3626{
3627 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3628
3629 switch (on_err) {
3630 case BLOCKDEV_ON_ERROR_ENOSPC:
a589569f
WX
3631 return (error == ENOSPC) ?
3632 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
3e1caa5f 3633 case BLOCKDEV_ON_ERROR_STOP:
a589569f 3634 return BLOCK_ERROR_ACTION_STOP;
3e1caa5f 3635 case BLOCKDEV_ON_ERROR_REPORT:
a589569f 3636 return BLOCK_ERROR_ACTION_REPORT;
3e1caa5f 3637 case BLOCKDEV_ON_ERROR_IGNORE:
a589569f 3638 return BLOCK_ERROR_ACTION_IGNORE;
3e1caa5f
PB
3639 default:
3640 abort();
3641 }
3642}
3643
c7c2ff0c
LC
3644static void send_qmp_error_event(BlockDriverState *bs,
3645 BlockErrorAction action,
3646 bool is_read, int error)
3647{
3648 BlockErrorAction ac;
3649
3650 ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3651 qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
3652 bdrv_iostatus_is_enabled(bs),
624ff573
LC
3653 error == ENOSPC, strerror(error),
3654 &error_abort);
c7c2ff0c
LC
3655}
3656
3e1caa5f
PB
3657/* This is done by device models because, while the block layer knows
3658 * about the error, it does not know whether an operation comes from
3659 * the device or the block layer (from a job, for example).
3660 */
3661void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3662 bool is_read, int error)
3663{
3664 assert(error >= 0);
2bd3bce8 3665
a589569f 3666 if (action == BLOCK_ERROR_ACTION_STOP) {
2bd3bce8
PB
3667 /* First set the iostatus, so that "info block" returns an iostatus
3668 * that matches the events raised so far (an additional error iostatus
3669 * is fine, but not a lost one).
3670 */
3e1caa5f 3671 bdrv_iostatus_set_err(bs, error);
2bd3bce8
PB
3672
3673 /* Then raise the request to stop the VM and the event.
3674 * qemu_system_vmstop_request_prepare has two effects. First,
3675 * it ensures that the STOP event always comes after the
3676 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3677 * can observe the STOP event and do a "cont" before the STOP
3678 * event is issued, the VM will not stop. In this case, vm_start()
3679 * also ensures that the STOP/RESUME pair of events is emitted.
3680 */
3681 qemu_system_vmstop_request_prepare();
c7c2ff0c 3682 send_qmp_error_event(bs, action, is_read, error);
2bd3bce8
PB
3683 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3684 } else {
c7c2ff0c 3685 send_qmp_error_event(bs, action, is_read, error);
3e1caa5f
PB
3686 }
3687}
3688
b338082b
FB
3689int bdrv_is_read_only(BlockDriverState *bs)
3690{
3691 return bs->read_only;
3692}
3693
985a03b0
TS
3694int bdrv_is_sg(BlockDriverState *bs)
3695{
3696 return bs->sg;
3697}
3698
e900a7b7
CH
3699int bdrv_enable_write_cache(BlockDriverState *bs)
3700{
3701 return bs->enable_write_cache;
3702}
3703
425b0148
PB
3704void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3705{
3706 bs->enable_write_cache = wce;
55b110f2
JC
3707
3708 /* so a reopen() will preserve wce */
3709 if (wce) {
3710 bs->open_flags |= BDRV_O_CACHE_WB;
3711 } else {
3712 bs->open_flags &= ~BDRV_O_CACHE_WB;
3713 }
425b0148
PB
3714}
3715
ea2384d3
FB
3716int bdrv_is_encrypted(BlockDriverState *bs)
3717{
3718 if (bs->backing_hd && bs->backing_hd->encrypted)
3719 return 1;
3720 return bs->encrypted;
3721}
3722
c0f4ce77
AL
3723int bdrv_key_required(BlockDriverState *bs)
3724{
3725 BlockDriverState *backing_hd = bs->backing_hd;
3726
3727 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3728 return 1;
3729 return (bs->encrypted && !bs->valid_key);
3730}
3731
ea2384d3
FB
3732int bdrv_set_key(BlockDriverState *bs, const char *key)
3733{
3734 int ret;
3735 if (bs->backing_hd && bs->backing_hd->encrypted) {
3736 ret = bdrv_set_key(bs->backing_hd, key);
3737 if (ret < 0)
3738 return ret;
3739 if (!bs->encrypted)
3740 return 0;
3741 }
fd04a2ae
SH
3742 if (!bs->encrypted) {
3743 return -EINVAL;
3744 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3745 return -ENOMEDIUM;
3746 }
c0f4ce77 3747 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
3748 if (ret < 0) {
3749 bs->valid_key = 0;
3750 } else if (!bs->valid_key) {
3751 bs->valid_key = 1;
3752 /* call the change callback now, we skipped it on open */
7d4b4ba5 3753 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 3754 }
c0f4ce77 3755 return ret;
ea2384d3
FB
3756}
3757
f8d6bba1 3758const char *bdrv_get_format_name(BlockDriverState *bs)
ea2384d3 3759{
f8d6bba1 3760 return bs->drv ? bs->drv->format_name : NULL;
ea2384d3
FB
3761}
3762
ada42401
SH
3763static int qsort_strcmp(const void *a, const void *b)
3764{
3765 return strcmp(a, b);
3766}
3767
5fafdf24 3768void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
3769 void *opaque)
3770{
3771 BlockDriver *drv;
e855e4fb 3772 int count = 0;
ada42401 3773 int i;
e855e4fb 3774 const char **formats = NULL;
ea2384d3 3775
8a22f02a 3776 QLIST_FOREACH(drv, &bdrv_drivers, list) {
e855e4fb
JC
3777 if (drv->format_name) {
3778 bool found = false;
3779 int i = count;
3780 while (formats && i && !found) {
3781 found = !strcmp(formats[--i], drv->format_name);
3782 }
3783
3784 if (!found) {
5839e53b 3785 formats = g_renew(const char *, formats, count + 1);
e855e4fb 3786 formats[count++] = drv->format_name;
e855e4fb
JC
3787 }
3788 }
ea2384d3 3789 }
ada42401
SH
3790
3791 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3792
3793 for (i = 0; i < count; i++) {
3794 it(opaque, formats[i]);
3795 }
3796
e855e4fb 3797 g_free(formats);
ea2384d3
FB
3798}
3799
dc364f4c 3800/* This function is to find block backend bs */
b338082b
FB
3801BlockDriverState *bdrv_find(const char *name)
3802{
3803 BlockDriverState *bs;
3804
dc364f4c 3805 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1b7bdbc1 3806 if (!strcmp(name, bs->device_name)) {
b338082b 3807 return bs;
1b7bdbc1 3808 }
b338082b
FB
3809 }
3810 return NULL;
3811}
3812
dc364f4c
BC
3813/* This function is to find a node in the bs graph */
3814BlockDriverState *bdrv_find_node(const char *node_name)
3815{
3816 BlockDriverState *bs;
3817
3818 assert(node_name);
3819
3820 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3821 if (!strcmp(node_name, bs->node_name)) {
3822 return bs;
3823 }
3824 }
3825 return NULL;
3826}
3827
c13163fb
BC
3828/* Put this QMP function here so it can access the static graph_bdrv_states. */
3829BlockDeviceInfoList *bdrv_named_nodes_list(void)
3830{
3831 BlockDeviceInfoList *list, *entry;
3832 BlockDriverState *bs;
3833
3834 list = NULL;
3835 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3836 entry = g_malloc0(sizeof(*entry));
3837 entry->value = bdrv_block_device_info(bs);
3838 entry->next = list;
3839 list = entry;
3840 }
3841
3842 return list;
3843}
3844
12d3ba82
BC
3845BlockDriverState *bdrv_lookup_bs(const char *device,
3846 const char *node_name,
3847 Error **errp)
3848{
3849 BlockDriverState *bs = NULL;
3850
12d3ba82
BC
3851 if (device) {
3852 bs = bdrv_find(device);
3853
dd67fa50
BC
3854 if (bs) {
3855 return bs;
12d3ba82 3856 }
12d3ba82
BC
3857 }
3858
dd67fa50
BC
3859 if (node_name) {
3860 bs = bdrv_find_node(node_name);
12d3ba82 3861
dd67fa50
BC
3862 if (bs) {
3863 return bs;
3864 }
12d3ba82
BC
3865 }
3866
dd67fa50
BC
3867 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3868 device ? device : "",
3869 node_name ? node_name : "");
3870 return NULL;
12d3ba82
BC
3871}
3872
5a6684d2
JC
3873/* If 'base' is in the same chain as 'top', return true. Otherwise,
3874 * return false. If either argument is NULL, return false. */
3875bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3876{
3877 while (top && top != base) {
3878 top = top->backing_hd;
3879 }
3880
3881 return top != NULL;
3882}
3883
2f399b0a
MA
3884BlockDriverState *bdrv_next(BlockDriverState *bs)
3885{
3886 if (!bs) {
3887 return QTAILQ_FIRST(&bdrv_states);
3888 }
dc364f4c 3889 return QTAILQ_NEXT(bs, device_list);
2f399b0a
MA
3890}
3891
51de9760 3892void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
3893{
3894 BlockDriverState *bs;
3895
dc364f4c 3896 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
51de9760 3897 it(opaque, bs);
81d0912d
FB
3898 }
3899}
3900
ea2384d3
FB
3901const char *bdrv_get_device_name(BlockDriverState *bs)
3902{
3903 return bs->device_name;
3904}
3905
c8433287
MA
3906int bdrv_get_flags(BlockDriverState *bs)
3907{
3908 return bs->open_flags;
3909}
3910
f0f0fdfe 3911int bdrv_flush_all(void)
c6ca28d6
AL
3912{
3913 BlockDriverState *bs;
f0f0fdfe 3914 int result = 0;
c6ca28d6 3915
dc364f4c 3916 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
3917 AioContext *aio_context = bdrv_get_aio_context(bs);
3918 int ret;
3919
3920 aio_context_acquire(aio_context);
3921 ret = bdrv_flush(bs);
f0f0fdfe
KW
3922 if (ret < 0 && !result) {
3923 result = ret;
3924 }
ed78cda3 3925 aio_context_release(aio_context);
1b7bdbc1 3926 }
f0f0fdfe
KW
3927
3928 return result;
c6ca28d6
AL
3929}
3930
3ac21627
PL
3931int bdrv_has_zero_init_1(BlockDriverState *bs)
3932{
3933 return 1;
3934}
3935
f2feebbd
KW
3936int bdrv_has_zero_init(BlockDriverState *bs)
3937{
3938 assert(bs->drv);
3939
11212d8f
PB
3940 /* If BS is a copy on write image, it is initialized to
3941 the contents of the base image, which may not be zeroes. */
3942 if (bs->backing_hd) {
3943 return 0;
3944 }
336c1c12
KW
3945 if (bs->drv->bdrv_has_zero_init) {
3946 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
3947 }
3948
3ac21627
PL
3949 /* safe default */
3950 return 0;
f2feebbd
KW
3951}
3952
4ce78691
PL
3953bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3954{
3955 BlockDriverInfo bdi;
3956
3957 if (bs->backing_hd) {
3958 return false;
3959 }
3960
3961 if (bdrv_get_info(bs, &bdi) == 0) {
3962 return bdi.unallocated_blocks_are_zero;
3963 }
3964
3965 return false;
3966}
3967
3968bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3969{
3970 BlockDriverInfo bdi;
3971
3972 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3973 return false;
3974 }
3975
3976 if (bdrv_get_info(bs, &bdi) == 0) {
3977 return bdi.can_write_zeroes_with_unmap;
3978 }
3979
3980 return false;
3981}
3982
b6b8a333 3983typedef struct BdrvCoGetBlockStatusData {
376ae3f1 3984 BlockDriverState *bs;
b35b2bba 3985 BlockDriverState *base;
376ae3f1
SH
3986 int64_t sector_num;
3987 int nb_sectors;
3988 int *pnum;
b6b8a333 3989 int64_t ret;
376ae3f1 3990 bool done;
b6b8a333 3991} BdrvCoGetBlockStatusData;
376ae3f1 3992
f58c7b35
TS
3993/*
3994 * Returns true iff the specified sector is present in the disk image. Drivers
3995 * not implementing the functionality are assumed to not support backing files,
3996 * hence all their sectors are reported as allocated.
3997 *
bd9533e3
SH
3998 * If 'sector_num' is beyond the end of the disk image the return value is 0
3999 * and 'pnum' is set to 0.
4000 *
f58c7b35
TS
4001 * 'pnum' is set to the number of sectors (including and immediately following
4002 * the specified sector) that are known to be in the same
4003 * allocated/unallocated state.
4004 *
bd9533e3
SH
4005 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4006 * beyond the end of the disk image it will be clamped.
f58c7b35 4007 */
b6b8a333
PB
4008static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4009 int64_t sector_num,
4010 int nb_sectors, int *pnum)
f58c7b35 4011{
30a7f2fc 4012 int64_t total_sectors;
bd9533e3 4013 int64_t n;
5daa74a6 4014 int64_t ret, ret2;
bd9533e3 4015
30a7f2fc
MA
4016 total_sectors = bdrv_nb_sectors(bs);
4017 if (total_sectors < 0) {
4018 return total_sectors;
617ccb46
PB
4019 }
4020
30a7f2fc 4021 if (sector_num >= total_sectors) {
bd9533e3
SH
4022 *pnum = 0;
4023 return 0;
4024 }
4025
30a7f2fc 4026 n = total_sectors - sector_num;
bd9533e3
SH
4027 if (n < nb_sectors) {
4028 nb_sectors = n;
4029 }
4030
b6b8a333 4031 if (!bs->drv->bdrv_co_get_block_status) {
bd9533e3 4032 *pnum = nb_sectors;
e88ae226 4033 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
918e92d7
PB
4034 if (bs->drv->protocol_name) {
4035 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4036 }
4037 return ret;
f58c7b35 4038 }
6aebab14 4039
415b5b01
PB
4040 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4041 if (ret < 0) {
3e0a233d 4042 *pnum = 0;
415b5b01
PB
4043 return ret;
4044 }
4045
92bc50a5
PL
4046 if (ret & BDRV_BLOCK_RAW) {
4047 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4048 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4049 *pnum, pnum);
4050 }
4051
e88ae226
KW
4052 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4053 ret |= BDRV_BLOCK_ALLOCATED;
4054 }
4055
c3d86884
PL
4056 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4057 if (bdrv_unallocated_blocks_are_zero(bs)) {
f0ad5712 4058 ret |= BDRV_BLOCK_ZERO;
1f9db224 4059 } else if (bs->backing_hd) {
f0ad5712 4060 BlockDriverState *bs2 = bs->backing_hd;
30a7f2fc
MA
4061 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4062 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
f0ad5712
PB
4063 ret |= BDRV_BLOCK_ZERO;
4064 }
4065 }
415b5b01 4066 }
5daa74a6
PB
4067
4068 if (bs->file &&
4069 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4070 (ret & BDRV_BLOCK_OFFSET_VALID)) {
4071 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4072 *pnum, pnum);
4073 if (ret2 >= 0) {
4074 /* Ignore errors. This is just providing extra information, it
4075 * is useful but not necessary.
4076 */
4077 ret |= (ret2 & BDRV_BLOCK_ZERO);
4078 }
4079 }
4080
415b5b01 4081 return ret;
060f51c9
SH
4082}
4083
b6b8a333
PB
4084/* Coroutine wrapper for bdrv_get_block_status() */
4085static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
060f51c9 4086{
b6b8a333 4087 BdrvCoGetBlockStatusData *data = opaque;
060f51c9
SH
4088 BlockDriverState *bs = data->bs;
4089
b6b8a333
PB
4090 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4091 data->pnum);
060f51c9
SH
4092 data->done = true;
4093}
4094
4095/*
b6b8a333 4096 * Synchronous wrapper around bdrv_co_get_block_status().
060f51c9 4097 *
b6b8a333 4098 * See bdrv_co_get_block_status() for details.
060f51c9 4099 */
b6b8a333
PB
4100int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4101 int nb_sectors, int *pnum)
060f51c9 4102{
6aebab14 4103 Coroutine *co;
b6b8a333 4104 BdrvCoGetBlockStatusData data = {
6aebab14
SH
4105 .bs = bs,
4106 .sector_num = sector_num,
4107 .nb_sectors = nb_sectors,
4108 .pnum = pnum,
4109 .done = false,
4110 };
4111
bdad13b9
PB
4112 if (qemu_in_coroutine()) {
4113 /* Fast-path if already in coroutine context */
b6b8a333 4114 bdrv_get_block_status_co_entry(&data);
bdad13b9 4115 } else {
2572b37a
SH
4116 AioContext *aio_context = bdrv_get_aio_context(bs);
4117
b6b8a333 4118 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
bdad13b9
PB
4119 qemu_coroutine_enter(co, &data);
4120 while (!data.done) {
2572b37a 4121 aio_poll(aio_context, true);
bdad13b9 4122 }
6aebab14
SH
4123 }
4124 return data.ret;
f58c7b35
TS
4125}
4126
b6b8a333
PB
4127int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4128 int nb_sectors, int *pnum)
4129{
4333bb71
PB
4130 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4131 if (ret < 0) {
4132 return ret;
4133 }
01fb2705 4134 return !!(ret & BDRV_BLOCK_ALLOCATED);
b6b8a333
PB
4135}
4136
188a7bbf
PB
4137/*
4138 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4139 *
4140 * Return true if the given sector is allocated in any image between
4141 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4142 * sector is allocated in any image of the chain. Return false otherwise.
4143 *
4144 * 'pnum' is set to the number of sectors (including and immediately following
4145 * the specified sector) that are known to be in the same
4146 * allocated/unallocated state.
4147 *
4148 */
4f578637
PB
4149int bdrv_is_allocated_above(BlockDriverState *top,
4150 BlockDriverState *base,
4151 int64_t sector_num,
4152 int nb_sectors, int *pnum)
188a7bbf
PB
4153{
4154 BlockDriverState *intermediate;
4155 int ret, n = nb_sectors;
4156
4157 intermediate = top;
4158 while (intermediate && intermediate != base) {
4159 int pnum_inter;
bdad13b9
PB
4160 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4161 &pnum_inter);
188a7bbf
PB
4162 if (ret < 0) {
4163 return ret;
4164 } else if (ret) {
4165 *pnum = pnum_inter;
4166 return 1;
4167 }
4168
4169 /*
4170 * [sector_num, nb_sectors] is unallocated on top but intermediate
4171 * might have
4172 *
4173 * [sector_num+x, nr_sectors] allocated.
4174 */
63ba17d3
VI
4175 if (n > pnum_inter &&
4176 (intermediate == top ||
4177 sector_num + pnum_inter < intermediate->total_sectors)) {
188a7bbf
PB
4178 n = pnum_inter;
4179 }
4180
4181 intermediate = intermediate->backing_hd;
4182 }
4183
4184 *pnum = n;
4185 return 0;
4186}
4187
045df330
AL
4188const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4189{
4190 if (bs->backing_hd && bs->backing_hd->encrypted)
4191 return bs->backing_file;
4192 else if (bs->encrypted)
4193 return bs->filename;
4194 else
4195 return NULL;
4196}
4197
5fafdf24 4198void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
4199 char *filename, int filename_size)
4200{
3574c608 4201 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
4202}
4203
5fafdf24 4204int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
4205 const uint8_t *buf, int nb_sectors)
4206{
4207 BlockDriver *drv = bs->drv;
4208 if (!drv)
19cb3738 4209 return -ENOMEDIUM;
faea38e7
FB
4210 if (!drv->bdrv_write_compressed)
4211 return -ENOTSUP;
fbb7b4e0
KW
4212 if (bdrv_check_request(bs, sector_num, nb_sectors))
4213 return -EIO;
a55eb92c 4214
e4654d2d 4215 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
a55eb92c 4216
faea38e7
FB
4217 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4218}
3b46e624 4219
faea38e7
FB
4220int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4221{
4222 BlockDriver *drv = bs->drv;
4223 if (!drv)
19cb3738 4224 return -ENOMEDIUM;
faea38e7
FB
4225 if (!drv->bdrv_get_info)
4226 return -ENOTSUP;
4227 memset(bdi, 0, sizeof(*bdi));
4228 return drv->bdrv_get_info(bs, bdi);
4229}
4230
eae041fe
HR
4231ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4232{
4233 BlockDriver *drv = bs->drv;
4234 if (drv && drv->bdrv_get_specific_info) {
4235 return drv->bdrv_get_specific_info(bs);
4236 }
4237 return NULL;
4238}
4239
45566e9c
CH
4240int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4241 int64_t pos, int size)
cf8074b3
KW
4242{
4243 QEMUIOVector qiov;
4244 struct iovec iov = {
4245 .iov_base = (void *) buf,
4246 .iov_len = size,
4247 };
4248
4249 qemu_iovec_init_external(&qiov, &iov, 1);
4250 return bdrv_writev_vmstate(bs, &qiov, pos);
4251}
4252
4253int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
178e08a5
AL
4254{
4255 BlockDriver *drv = bs->drv;
cf8074b3
KW
4256
4257 if (!drv) {
178e08a5 4258 return -ENOMEDIUM;
cf8074b3
KW
4259 } else if (drv->bdrv_save_vmstate) {
4260 return drv->bdrv_save_vmstate(bs, qiov, pos);
4261 } else if (bs->file) {
4262 return bdrv_writev_vmstate(bs->file, qiov, pos);
4263 }
4264
7cdb1f6d 4265 return -ENOTSUP;
178e08a5
AL
4266}
4267
45566e9c
CH
4268int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4269 int64_t pos, int size)
178e08a5
AL
4270{
4271 BlockDriver *drv = bs->drv;
4272 if (!drv)
4273 return -ENOMEDIUM;
7cdb1f6d
MK
4274 if (drv->bdrv_load_vmstate)
4275 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4276 if (bs->file)
4277 return bdrv_load_vmstate(bs->file, buf, pos, size);
4278 return -ENOTSUP;
178e08a5
AL
4279}
4280
8b9b0cc2
KW
4281void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4282{
bf736fe3 4283 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
8b9b0cc2
KW
4284 return;
4285 }
4286
bf736fe3 4287 bs->drv->bdrv_debug_event(bs, event);
41c695c7
KW
4288}
4289
4290int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4291 const char *tag)
4292{
4293 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4294 bs = bs->file;
4295 }
4296
4297 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4298 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4299 }
4300
4301 return -ENOTSUP;
4302}
4303
4cc70e93
FZ
4304int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4305{
4306 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4307 bs = bs->file;
4308 }
4309
4310 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4311 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4312 }
4313
4314 return -ENOTSUP;
4315}
4316
41c695c7
KW
4317int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4318{
938789ea 4319 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
41c695c7
KW
4320 bs = bs->file;
4321 }
8b9b0cc2 4322
41c695c7
KW
4323 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4324 return bs->drv->bdrv_debug_resume(bs, tag);
4325 }
4326
4327 return -ENOTSUP;
4328}
4329
4330bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4331{
4332 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4333 bs = bs->file;
4334 }
4335
4336 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4337 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4338 }
4339
4340 return false;
8b9b0cc2
KW
4341}
4342
199630b6
BS
4343int bdrv_is_snapshot(BlockDriverState *bs)
4344{
4345 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4346}
4347
b1b1d783
JC
4348/* backing_file can either be relative, or absolute, or a protocol. If it is
4349 * relative, it must be relative to the chain. So, passing in bs->filename
4350 * from a BDS as backing_file should not be done, as that may be relative to
4351 * the CWD rather than the chain. */
e8a6bb9c
MT
4352BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4353 const char *backing_file)
4354{
b1b1d783
JC
4355 char *filename_full = NULL;
4356 char *backing_file_full = NULL;
4357 char *filename_tmp = NULL;
4358 int is_protocol = 0;
4359 BlockDriverState *curr_bs = NULL;
4360 BlockDriverState *retval = NULL;
4361
4362 if (!bs || !bs->drv || !backing_file) {
e8a6bb9c
MT
4363 return NULL;
4364 }
4365
b1b1d783
JC
4366 filename_full = g_malloc(PATH_MAX);
4367 backing_file_full = g_malloc(PATH_MAX);
4368 filename_tmp = g_malloc(PATH_MAX);
4369
4370 is_protocol = path_has_protocol(backing_file);
4371
4372 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4373
4374 /* If either of the filename paths is actually a protocol, then
4375 * compare unmodified paths; otherwise make paths relative */
4376 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4377 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4378 retval = curr_bs->backing_hd;
4379 break;
4380 }
e8a6bb9c 4381 } else {
b1b1d783
JC
4382 /* If not an absolute filename path, make it relative to the current
4383 * image's filename path */
4384 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4385 backing_file);
4386
4387 /* We are going to compare absolute pathnames */
4388 if (!realpath(filename_tmp, filename_full)) {
4389 continue;
4390 }
4391
4392 /* We need to make sure the backing filename we are comparing against
4393 * is relative to the current image filename (or absolute) */
4394 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4395 curr_bs->backing_file);
4396
4397 if (!realpath(filename_tmp, backing_file_full)) {
4398 continue;
4399 }
4400
4401 if (strcmp(backing_file_full, filename_full) == 0) {
4402 retval = curr_bs->backing_hd;
4403 break;
4404 }
e8a6bb9c
MT
4405 }
4406 }
4407
b1b1d783
JC
4408 g_free(filename_full);
4409 g_free(backing_file_full);
4410 g_free(filename_tmp);
4411 return retval;
e8a6bb9c
MT
4412}
4413
f198fd1c
BC
4414int bdrv_get_backing_file_depth(BlockDriverState *bs)
4415{
4416 if (!bs->drv) {
4417 return 0;
4418 }
4419
4420 if (!bs->backing_hd) {
4421 return 0;
4422 }
4423
4424 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4425}
4426
ea2384d3 4427/**************************************************************/
83f64091 4428/* async I/Os */
ea2384d3 4429
3b69e4b9 4430BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 4431 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 4432 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 4433{
bbf0a440
SH
4434 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4435
d20d9b7c 4436 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
8c5873d6 4437 cb, opaque, false);
ea2384d3
FB
4438}
4439
f141eafe
AL
4440BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4441 QEMUIOVector *qiov, int nb_sectors,
4442 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 4443{
bbf0a440
SH
4444 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4445
d20d9b7c 4446 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
8c5873d6 4447 cb, opaque, true);
83f64091
FB
4448}
4449
d5ef94d4
PB
4450BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4451 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4452 BlockDriverCompletionFunc *cb, void *opaque)
4453{
4454 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4455
4456 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4457 BDRV_REQ_ZERO_WRITE | flags,
4458 cb, opaque, true);
4459}
4460
40b4f539
KW
4461
4462typedef struct MultiwriteCB {
4463 int error;
4464 int num_requests;
4465 int num_callbacks;
4466 struct {
4467 BlockDriverCompletionFunc *cb;
4468 void *opaque;
4469 QEMUIOVector *free_qiov;
40b4f539
KW
4470 } callbacks[];
4471} MultiwriteCB;
4472
4473static void multiwrite_user_cb(MultiwriteCB *mcb)
4474{
4475 int i;
4476
4477 for (i = 0; i < mcb->num_callbacks; i++) {
4478 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
4479 if (mcb->callbacks[i].free_qiov) {
4480 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4481 }
7267c094 4482 g_free(mcb->callbacks[i].free_qiov);
40b4f539
KW
4483 }
4484}
4485
4486static void multiwrite_cb(void *opaque, int ret)
4487{
4488 MultiwriteCB *mcb = opaque;
4489
6d519a5f
SH
4490 trace_multiwrite_cb(mcb, ret);
4491
cb6d3ca0 4492 if (ret < 0 && !mcb->error) {
40b4f539 4493 mcb->error = ret;
40b4f539
KW
4494 }
4495
4496 mcb->num_requests--;
4497 if (mcb->num_requests == 0) {
de189a1b 4498 multiwrite_user_cb(mcb);
7267c094 4499 g_free(mcb);
40b4f539
KW
4500 }
4501}
4502
4503static int multiwrite_req_compare(const void *a, const void *b)
4504{
77be4366
CH
4505 const BlockRequest *req1 = a, *req2 = b;
4506
4507 /*
4508 * Note that we can't simply subtract req2->sector from req1->sector
4509 * here as that could overflow the return value.
4510 */
4511 if (req1->sector > req2->sector) {
4512 return 1;
4513 } else if (req1->sector < req2->sector) {
4514 return -1;
4515 } else {
4516 return 0;
4517 }
40b4f539
KW
4518}
4519
4520/*
4521 * Takes a bunch of requests and tries to merge them. Returns the number of
4522 * requests that remain after merging.
4523 */
4524static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4525 int num_reqs, MultiwriteCB *mcb)
4526{
4527 int i, outidx;
4528
4529 // Sort requests by start sector
4530 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4531
4532 // Check if adjacent requests touch the same clusters. If so, combine them,
4533 // filling up gaps with zero sectors.
4534 outidx = 0;
4535 for (i = 1; i < num_reqs; i++) {
4536 int merge = 0;
4537 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4538
b6a127a1 4539 // Handle exactly sequential writes and overlapping writes.
40b4f539
KW
4540 if (reqs[i].sector <= oldreq_last) {
4541 merge = 1;
4542 }
4543
e2a305fb
CH
4544 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4545 merge = 0;
4546 }
4547
40b4f539
KW
4548 if (merge) {
4549 size_t size;
7267c094 4550 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
4551 qemu_iovec_init(qiov,
4552 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4553
4554 // Add the first request to the merged one. If the requests are
4555 // overlapping, drop the last sectors of the first request.
4556 size = (reqs[i].sector - reqs[outidx].sector) << 9;
1b093c48 4557 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
40b4f539 4558
b6a127a1
PB
4559 // We should need to add any zeros between the two requests
4560 assert (reqs[i].sector <= oldreq_last);
40b4f539
KW
4561
4562 // Add the second request
1b093c48 4563 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
40b4f539 4564
391827eb
SH
4565 // Add tail of first request, if necessary
4566 if (qiov->size < reqs[outidx].qiov->size) {
4567 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4568 reqs[outidx].qiov->size - qiov->size);
4569 }
4570
cbf1dff2 4571 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
4572 reqs[outidx].qiov = qiov;
4573
4574 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4575 } else {
4576 outidx++;
4577 reqs[outidx].sector = reqs[i].sector;
4578 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4579 reqs[outidx].qiov = reqs[i].qiov;
4580 }
4581 }
4582
4583 return outidx + 1;
4584}
4585
4586/*
4587 * Submit multiple AIO write requests at once.
4588 *
4589 * On success, the function returns 0 and all requests in the reqs array have
4590 * been submitted. In error case this function returns -1, and any of the
4591 * requests may or may not be submitted yet. In particular, this means that the
4592 * callback will be called for some of the requests, for others it won't. The
4593 * caller must check the error field of the BlockRequest to wait for the right
4594 * callbacks (if error != 0, no callback will be called).
4595 *
4596 * The implementation may modify the contents of the reqs array, e.g. to merge
4597 * requests. However, the fields opaque and error are left unmodified as they
4598 * are used to signal failure for a single request to the caller.
4599 */
4600int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4601{
40b4f539
KW
4602 MultiwriteCB *mcb;
4603 int i;
4604
301db7c2
RH
4605 /* don't submit writes if we don't have a medium */
4606 if (bs->drv == NULL) {
4607 for (i = 0; i < num_reqs; i++) {
4608 reqs[i].error = -ENOMEDIUM;
4609 }
4610 return -1;
4611 }
4612
40b4f539
KW
4613 if (num_reqs == 0) {
4614 return 0;
4615 }
4616
4617 // Create MultiwriteCB structure
7267c094 4618 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
4619 mcb->num_requests = 0;
4620 mcb->num_callbacks = num_reqs;
4621
4622 for (i = 0; i < num_reqs; i++) {
4623 mcb->callbacks[i].cb = reqs[i].cb;
4624 mcb->callbacks[i].opaque = reqs[i].opaque;
4625 }
4626
4627 // Check for mergable requests
4628 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4629
6d519a5f
SH
4630 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4631
df9309fb
PB
4632 /* Run the aio requests. */
4633 mcb->num_requests = num_reqs;
40b4f539 4634 for (i = 0; i < num_reqs; i++) {
d20d9b7c
PB
4635 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4636 reqs[i].nb_sectors, reqs[i].flags,
4637 multiwrite_cb, mcb,
4638 true);
40b4f539
KW
4639 }
4640
4641 return 0;
40b4f539
KW
4642}
4643
83f64091 4644void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 4645{
ca5fd113
FZ
4646 qemu_aio_ref(acb);
4647 bdrv_aio_cancel_async(acb);
4648 while (acb->refcnt > 1) {
4649 if (acb->aiocb_info->get_aio_context) {
4650 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4651 } else if (acb->bs) {
4652 aio_poll(bdrv_get_aio_context(acb->bs), true);
4653 } else {
4654 abort();
02c50efe 4655 }
02c50efe 4656 }
8007429a 4657 qemu_aio_unref(acb);
02c50efe
FZ
4658}
4659
4660/* Async version of aio cancel. The caller is not blocked if the acb implements
4661 * cancel_async, otherwise we do nothing and let the request normally complete.
4662 * In either case the completion callback must be called. */
4663void bdrv_aio_cancel_async(BlockDriverAIOCB *acb)
4664{
4665 if (acb->aiocb_info->cancel_async) {
4666 acb->aiocb_info->cancel_async(acb);
4667 }
83f64091
FB
4668}
4669
4670/**************************************************************/
4671/* async block device emulation */
4672
c16b5a2c
CH
4673typedef struct BlockDriverAIOCBSync {
4674 BlockDriverAIOCB common;
4675 QEMUBH *bh;
4676 int ret;
4677 /* vector translation state */
4678 QEMUIOVector *qiov;
4679 uint8_t *bounce;
4680 int is_write;
4681} BlockDriverAIOCBSync;
4682
d7331bed 4683static const AIOCBInfo bdrv_em_aiocb_info = {
c16b5a2c 4684 .aiocb_size = sizeof(BlockDriverAIOCBSync),
c16b5a2c
CH
4685};
4686
ce1a14dc 4687static void bdrv_aio_bh_cb(void *opaque)
83f64091 4688{
ce1a14dc 4689 BlockDriverAIOCBSync *acb = opaque;
f141eafe 4690
857d4f46 4691 if (!acb->is_write && acb->ret >= 0) {
03396148 4692 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
857d4f46 4693 }
ceb42de8 4694 qemu_vfree(acb->bounce);
ce1a14dc 4695 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 4696 qemu_bh_delete(acb->bh);
36afc451 4697 acb->bh = NULL;
8007429a 4698 qemu_aio_unref(acb);
83f64091 4699}
beac80cd 4700
f141eafe
AL
4701static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4702 int64_t sector_num,
4703 QEMUIOVector *qiov,
4704 int nb_sectors,
4705 BlockDriverCompletionFunc *cb,
4706 void *opaque,
4707 int is_write)
4708
83f64091 4709{
ce1a14dc 4710 BlockDriverAIOCBSync *acb;
ce1a14dc 4711
d7331bed 4712 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
f141eafe
AL
4713 acb->is_write = is_write;
4714 acb->qiov = qiov;
857d4f46 4715 acb->bounce = qemu_try_blockalign(bs, qiov->size);
2572b37a 4716 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
f141eafe 4717
857d4f46
KW
4718 if (acb->bounce == NULL) {
4719 acb->ret = -ENOMEM;
4720 } else if (is_write) {
d5e6b161 4721 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
1ed20acf 4722 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 4723 } else {
1ed20acf 4724 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
4725 }
4726
ce1a14dc 4727 qemu_bh_schedule(acb->bh);
f141eafe 4728
ce1a14dc 4729 return &acb->common;
beac80cd
FB
4730}
4731
f141eafe
AL
4732static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4733 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 4734 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 4735{
f141eafe
AL
4736 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
4737}
83f64091 4738
f141eafe
AL
4739static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4740 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4741 BlockDriverCompletionFunc *cb, void *opaque)
4742{
4743 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 4744}
beac80cd 4745
68485420
KW
4746
4747typedef struct BlockDriverAIOCBCoroutine {
4748 BlockDriverAIOCB common;
4749 BlockRequest req;
4750 bool is_write;
d318aea9 4751 bool *done;
68485420
KW
4752 QEMUBH* bh;
4753} BlockDriverAIOCBCoroutine;
4754
d7331bed 4755static const AIOCBInfo bdrv_em_co_aiocb_info = {
68485420 4756 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
68485420
KW
4757};
4758
35246a68 4759static void bdrv_co_em_bh(void *opaque)
68485420
KW
4760{
4761 BlockDriverAIOCBCoroutine *acb = opaque;
4762
4763 acb->common.cb(acb->common.opaque, acb->req.error);
d318aea9 4764
68485420 4765 qemu_bh_delete(acb->bh);
8007429a 4766 qemu_aio_unref(acb);
68485420
KW
4767}
4768
b2a61371
SH
4769/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4770static void coroutine_fn bdrv_co_do_rw(void *opaque)
4771{
4772 BlockDriverAIOCBCoroutine *acb = opaque;
4773 BlockDriverState *bs = acb->common.bs;
4774
4775 if (!acb->is_write) {
4776 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
d20d9b7c 4777 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
b2a61371
SH
4778 } else {
4779 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
d20d9b7c 4780 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
b2a61371
SH
4781 }
4782
2572b37a 4783 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
b2a61371
SH
4784 qemu_bh_schedule(acb->bh);
4785}
4786
68485420
KW
4787static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4788 int64_t sector_num,
4789 QEMUIOVector *qiov,
4790 int nb_sectors,
d20d9b7c 4791 BdrvRequestFlags flags,
68485420
KW
4792 BlockDriverCompletionFunc *cb,
4793 void *opaque,
8c5873d6 4794 bool is_write)
68485420
KW
4795{
4796 Coroutine *co;
4797 BlockDriverAIOCBCoroutine *acb;
4798
d7331bed 4799 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
68485420
KW
4800 acb->req.sector = sector_num;
4801 acb->req.nb_sectors = nb_sectors;
4802 acb->req.qiov = qiov;
d20d9b7c 4803 acb->req.flags = flags;
68485420
KW
4804 acb->is_write = is_write;
4805
8c5873d6 4806 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
4807 qemu_coroutine_enter(co, acb);
4808
4809 return &acb->common;
4810}
4811
07f07615 4812static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 4813{
07f07615
PB
4814 BlockDriverAIOCBCoroutine *acb = opaque;
4815 BlockDriverState *bs = acb->common.bs;
b2e12bc6 4816
07f07615 4817 acb->req.error = bdrv_co_flush(bs);
2572b37a 4818 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
b2e12bc6 4819 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
4820}
4821
07f07615 4822BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
4823 BlockDriverCompletionFunc *cb, void *opaque)
4824{
07f07615 4825 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 4826
07f07615
PB
4827 Coroutine *co;
4828 BlockDriverAIOCBCoroutine *acb;
016f5cf6 4829
d7331bed 4830 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
d318aea9 4831
07f07615
PB
4832 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4833 qemu_coroutine_enter(co, acb);
016f5cf6 4834
016f5cf6
AG
4835 return &acb->common;
4836}
4837
4265d620
PB
4838static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4839{
4840 BlockDriverAIOCBCoroutine *acb = opaque;
4841 BlockDriverState *bs = acb->common.bs;
4842
4843 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2572b37a 4844 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4265d620
PB
4845 qemu_bh_schedule(acb->bh);
4846}
4847
4848BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4849 int64_t sector_num, int nb_sectors,
4850 BlockDriverCompletionFunc *cb, void *opaque)
4851{
4852 Coroutine *co;
4853 BlockDriverAIOCBCoroutine *acb;
4854
4855 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4856
d7331bed 4857 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4265d620
PB
4858 acb->req.sector = sector_num;
4859 acb->req.nb_sectors = nb_sectors;
4860 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4861 qemu_coroutine_enter(co, acb);
4862
4863 return &acb->common;
4864}
4865
ea2384d3
FB
4866void bdrv_init(void)
4867{
5efa9d5a 4868 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 4869}
ce1a14dc 4870
eb852011
MA
4871void bdrv_init_with_whitelist(void)
4872{
4873 use_bdrv_whitelist = 1;
4874 bdrv_init();
4875}
4876
d7331bed 4877void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
c16b5a2c 4878 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 4879{
ce1a14dc
PB
4880 BlockDriverAIOCB *acb;
4881
d7331bed
SH
4882 acb = g_slice_alloc(aiocb_info->aiocb_size);
4883 acb->aiocb_info = aiocb_info;
ce1a14dc
PB
4884 acb->bs = bs;
4885 acb->cb = cb;
4886 acb->opaque = opaque;
f197fe2b 4887 acb->refcnt = 1;
ce1a14dc
PB
4888 return acb;
4889}
4890
f197fe2b
FZ
4891void qemu_aio_ref(void *p)
4892{
4893 BlockDriverAIOCB *acb = p;
4894 acb->refcnt++;
4895}
4896
8007429a 4897void qemu_aio_unref(void *p)
ce1a14dc 4898{
d37c975f 4899 BlockDriverAIOCB *acb = p;
f197fe2b
FZ
4900 assert(acb->refcnt > 0);
4901 if (--acb->refcnt == 0) {
4902 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4903 }
ce1a14dc 4904}
19cb3738 4905
f9f05dc5
KW
4906/**************************************************************/
4907/* Coroutine block device emulation */
4908
4909typedef struct CoroutineIOCompletion {
4910 Coroutine *coroutine;
4911 int ret;
4912} CoroutineIOCompletion;
4913
4914static void bdrv_co_io_em_complete(void *opaque, int ret)
4915{
4916 CoroutineIOCompletion *co = opaque;
4917
4918 co->ret = ret;
4919 qemu_coroutine_enter(co->coroutine, NULL);
4920}
4921
4922static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4923 int nb_sectors, QEMUIOVector *iov,
4924 bool is_write)
4925{
4926 CoroutineIOCompletion co = {
4927 .coroutine = qemu_coroutine_self(),
4928 };
4929 BlockDriverAIOCB *acb;
4930
4931 if (is_write) {
a652d160
SH
4932 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4933 bdrv_co_io_em_complete, &co);
f9f05dc5 4934 } else {
a652d160
SH
4935 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4936 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
4937 }
4938
59370aaa 4939 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
4940 if (!acb) {
4941 return -EIO;
4942 }
4943 qemu_coroutine_yield();
4944
4945 return co.ret;
4946}
4947
4948static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4949 int64_t sector_num, int nb_sectors,
4950 QEMUIOVector *iov)
4951{
4952 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4953}
4954
4955static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4956 int64_t sector_num, int nb_sectors,
4957 QEMUIOVector *iov)
4958{
4959 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4960}
4961
07f07615 4962static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 4963{
07f07615
PB
4964 RwCo *rwco = opaque;
4965
4966 rwco->ret = bdrv_co_flush(rwco->bs);
4967}
4968
4969int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4970{
eb489bb1
KW
4971 int ret;
4972
29cdb251 4973 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
07f07615 4974 return 0;
eb489bb1
KW
4975 }
4976
ca716364 4977 /* Write back cached data to the OS even with cache=unsafe */
bf736fe3 4978 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
eb489bb1
KW
4979 if (bs->drv->bdrv_co_flush_to_os) {
4980 ret = bs->drv->bdrv_co_flush_to_os(bs);
4981 if (ret < 0) {
4982 return ret;
4983 }
4984 }
4985
ca716364
KW
4986 /* But don't actually force it to the disk with cache=unsafe */
4987 if (bs->open_flags & BDRV_O_NO_FLUSH) {
d4c82329 4988 goto flush_parent;
ca716364
KW
4989 }
4990
bf736fe3 4991 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
eb489bb1 4992 if (bs->drv->bdrv_co_flush_to_disk) {
29cdb251 4993 ret = bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
4994 } else if (bs->drv->bdrv_aio_flush) {
4995 BlockDriverAIOCB *acb;
4996 CoroutineIOCompletion co = {
4997 .coroutine = qemu_coroutine_self(),
4998 };
4999
5000 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5001 if (acb == NULL) {
29cdb251 5002 ret = -EIO;
07f07615
PB
5003 } else {
5004 qemu_coroutine_yield();
29cdb251 5005 ret = co.ret;
07f07615 5006 }
07f07615
PB
5007 } else {
5008 /*
5009 * Some block drivers always operate in either writethrough or unsafe
5010 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5011 * know how the server works (because the behaviour is hardcoded or
5012 * depends on server-side configuration), so we can't ensure that
5013 * everything is safe on disk. Returning an error doesn't work because
5014 * that would break guests even if the server operates in writethrough
5015 * mode.
5016 *
5017 * Let's hope the user knows what he's doing.
5018 */
29cdb251 5019 ret = 0;
07f07615 5020 }
29cdb251
PB
5021 if (ret < 0) {
5022 return ret;
5023 }
5024
5025 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5026 * in the case of cache=unsafe, so there are no useless flushes.
5027 */
d4c82329 5028flush_parent:
29cdb251 5029 return bdrv_co_flush(bs->file);
07f07615
PB
5030}
5031
5a8a30db 5032void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
0f15423c 5033{
5a8a30db
KW
5034 Error *local_err = NULL;
5035 int ret;
5036
3456a8d1
KW
5037 if (!bs->drv) {
5038 return;
5039 }
5040
5041 if (bs->drv->bdrv_invalidate_cache) {
5a8a30db 5042 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3456a8d1 5043 } else if (bs->file) {
5a8a30db
KW
5044 bdrv_invalidate_cache(bs->file, &local_err);
5045 }
5046 if (local_err) {
5047 error_propagate(errp, local_err);
5048 return;
0f15423c 5049 }
3456a8d1 5050
5a8a30db
KW
5051 ret = refresh_total_sectors(bs, bs->total_sectors);
5052 if (ret < 0) {
5053 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5054 return;
5055 }
0f15423c
AL
5056}
5057
5a8a30db 5058void bdrv_invalidate_cache_all(Error **errp)
0f15423c
AL
5059{
5060 BlockDriverState *bs;
5a8a30db 5061 Error *local_err = NULL;
0f15423c 5062
dc364f4c 5063 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
5064 AioContext *aio_context = bdrv_get_aio_context(bs);
5065
5066 aio_context_acquire(aio_context);
5a8a30db 5067 bdrv_invalidate_cache(bs, &local_err);
ed78cda3 5068 aio_context_release(aio_context);
5a8a30db
KW
5069 if (local_err) {
5070 error_propagate(errp, local_err);
5071 return;
5072 }
0f15423c
AL
5073 }
5074}
5075
07789269
BC
5076void bdrv_clear_incoming_migration_all(void)
5077{
5078 BlockDriverState *bs;
5079
dc364f4c 5080 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
5081 AioContext *aio_context = bdrv_get_aio_context(bs);
5082
5083 aio_context_acquire(aio_context);
07789269 5084 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
ed78cda3 5085 aio_context_release(aio_context);
07789269
BC
5086 }
5087}
5088
07f07615
PB
5089int bdrv_flush(BlockDriverState *bs)
5090{
5091 Coroutine *co;
5092 RwCo rwco = {
5093 .bs = bs,
5094 .ret = NOT_DONE,
e7a8a783 5095 };
e7a8a783 5096
07f07615
PB
5097 if (qemu_in_coroutine()) {
5098 /* Fast-path if already in coroutine context */
5099 bdrv_flush_co_entry(&rwco);
5100 } else {
2572b37a
SH
5101 AioContext *aio_context = bdrv_get_aio_context(bs);
5102
07f07615
PB
5103 co = qemu_coroutine_create(bdrv_flush_co_entry);
5104 qemu_coroutine_enter(co, &rwco);
5105 while (rwco.ret == NOT_DONE) {
2572b37a 5106 aio_poll(aio_context, true);
07f07615 5107 }
e7a8a783 5108 }
07f07615
PB
5109
5110 return rwco.ret;
e7a8a783
KW
5111}
5112
775aa8b6
KW
5113typedef struct DiscardCo {
5114 BlockDriverState *bs;
5115 int64_t sector_num;
5116 int nb_sectors;
5117 int ret;
5118} DiscardCo;
4265d620
PB
5119static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5120{
775aa8b6 5121 DiscardCo *rwco = opaque;
4265d620
PB
5122
5123 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5124}
5125
6f14da52
PL
5126/* if no limit is specified in the BlockLimits use a default
5127 * of 32768 512-byte sectors (16 MiB) per request.
5128 */
5129#define MAX_DISCARD_DEFAULT 32768
5130
4265d620
PB
5131int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5132 int nb_sectors)
5133{
d51e9fe5
PB
5134 int max_discard;
5135
4265d620
PB
5136 if (!bs->drv) {
5137 return -ENOMEDIUM;
5138 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5139 return -EIO;
5140 } else if (bs->read_only) {
5141 return -EROFS;
df702c9b
PB
5142 }
5143
e4654d2d 5144 bdrv_reset_dirty(bs, sector_num, nb_sectors);
df702c9b 5145
9e8f1835
PB
5146 /* Do nothing if disabled. */
5147 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5148 return 0;
5149 }
5150
d51e9fe5
PB
5151 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
5152 return 0;
5153 }
6f14da52 5154
d51e9fe5
PB
5155 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5156 while (nb_sectors > 0) {
5157 int ret;
5158 int num = nb_sectors;
6f14da52 5159
d51e9fe5
PB
5160 /* align request */
5161 if (bs->bl.discard_alignment &&
5162 num >= bs->bl.discard_alignment &&
5163 sector_num % bs->bl.discard_alignment) {
5164 if (num > bs->bl.discard_alignment) {
5165 num = bs->bl.discard_alignment;
6f14da52 5166 }
d51e9fe5
PB
5167 num -= sector_num % bs->bl.discard_alignment;
5168 }
6f14da52 5169
d51e9fe5
PB
5170 /* limit request size */
5171 if (num > max_discard) {
5172 num = max_discard;
5173 }
6f14da52 5174
d51e9fe5 5175 if (bs->drv->bdrv_co_discard) {
6f14da52 5176 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
d51e9fe5
PB
5177 } else {
5178 BlockDriverAIOCB *acb;
5179 CoroutineIOCompletion co = {
5180 .coroutine = qemu_coroutine_self(),
5181 };
5182
5183 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5184 bdrv_co_io_em_complete, &co);
5185 if (acb == NULL) {
5186 return -EIO;
5187 } else {
5188 qemu_coroutine_yield();
5189 ret = co.ret;
6f14da52 5190 }
6f14da52 5191 }
7ce21016 5192 if (ret && ret != -ENOTSUP) {
d51e9fe5 5193 return ret;
4265d620 5194 }
d51e9fe5
PB
5195
5196 sector_num += num;
5197 nb_sectors -= num;
4265d620 5198 }
d51e9fe5 5199 return 0;
4265d620
PB
5200}
5201
5202int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5203{
5204 Coroutine *co;
775aa8b6 5205 DiscardCo rwco = {
4265d620
PB
5206 .bs = bs,
5207 .sector_num = sector_num,
5208 .nb_sectors = nb_sectors,
5209 .ret = NOT_DONE,
5210 };
5211
5212 if (qemu_in_coroutine()) {
5213 /* Fast-path if already in coroutine context */
5214 bdrv_discard_co_entry(&rwco);
5215 } else {
2572b37a
SH
5216 AioContext *aio_context = bdrv_get_aio_context(bs);
5217
4265d620
PB
5218 co = qemu_coroutine_create(bdrv_discard_co_entry);
5219 qemu_coroutine_enter(co, &rwco);
5220 while (rwco.ret == NOT_DONE) {
2572b37a 5221 aio_poll(aio_context, true);
4265d620
PB
5222 }
5223 }
5224
5225 return rwco.ret;
5226}
5227
19cb3738
FB
5228/**************************************************************/
5229/* removable device support */
5230
5231/**
5232 * Return TRUE if the media is present
5233 */
5234int bdrv_is_inserted(BlockDriverState *bs)
5235{
5236 BlockDriver *drv = bs->drv;
a1aff5bf 5237
19cb3738
FB
5238 if (!drv)
5239 return 0;
5240 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
5241 return 1;
5242 return drv->bdrv_is_inserted(bs);
19cb3738
FB
5243}
5244
5245/**
8e49ca46
MA
5246 * Return whether the media changed since the last call to this
5247 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
5248 */
5249int bdrv_media_changed(BlockDriverState *bs)
5250{
5251 BlockDriver *drv = bs->drv;
19cb3738 5252
8e49ca46
MA
5253 if (drv && drv->bdrv_media_changed) {
5254 return drv->bdrv_media_changed(bs);
5255 }
5256 return -ENOTSUP;
19cb3738
FB
5257}
5258
5259/**
5260 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5261 */
f36f3949 5262void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
5263{
5264 BlockDriver *drv = bs->drv;
19cb3738 5265
822e1cd1
MA
5266 if (drv && drv->bdrv_eject) {
5267 drv->bdrv_eject(bs, eject_flag);
19cb3738 5268 }
6f382ed2
LC
5269
5270 if (bs->device_name[0] != '\0') {
a5ee7bd4
WX
5271 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
5272 eject_flag, &error_abort);
6f382ed2 5273 }
19cb3738
FB
5274}
5275
19cb3738
FB
5276/**
5277 * Lock or unlock the media (if it is locked, the user won't be able
5278 * to eject it manually).
5279 */
025e849a 5280void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
5281{
5282 BlockDriver *drv = bs->drv;
5283
025e849a 5284 trace_bdrv_lock_medium(bs, locked);
b8c6d095 5285
025e849a
MA
5286 if (drv && drv->bdrv_lock_medium) {
5287 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
5288 }
5289}
985a03b0
TS
5290
5291/* needed for generic scsi interface */
5292
5293int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5294{
5295 BlockDriver *drv = bs->drv;
5296
5297 if (drv && drv->bdrv_ioctl)
5298 return drv->bdrv_ioctl(bs, req, buf);
5299 return -ENOTSUP;
5300}
7d780669 5301
221f715d
AL
5302BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5303 unsigned long int req, void *buf,
5304 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 5305{
221f715d 5306 BlockDriver *drv = bs->drv;
7d780669 5307
221f715d
AL
5308 if (drv && drv->bdrv_aio_ioctl)
5309 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5310 return NULL;
7d780669 5311}
e268ca52 5312
1b7fd729 5313void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
7b6f9300 5314{
1b7fd729 5315 bs->guest_block_size = align;
7b6f9300 5316}
7cd1e32a 5317
e268ca52
AL
5318void *qemu_blockalign(BlockDriverState *bs, size_t size)
5319{
339064d5 5320 return qemu_memalign(bdrv_opt_mem_align(bs), size);
e268ca52 5321}
7cd1e32a 5322
7d2a35cc
KW
5323void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5324{
5325 size_t align = bdrv_opt_mem_align(bs);
5326
5327 /* Ensure that NULL is never returned on success */
5328 assert(align > 0);
5329 if (size == 0) {
5330 size = align;
5331 }
5332
5333 return qemu_try_memalign(align, size);
5334}
5335
c53b1c51
SH
5336/*
5337 * Check if all memory in this vector is sector aligned.
5338 */
5339bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5340{
5341 int i;
339064d5 5342 size_t alignment = bdrv_opt_mem_align(bs);
c53b1c51
SH
5343
5344 for (i = 0; i < qiov->niov; i++) {
339064d5 5345 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
c53b1c51 5346 return false;
1ff735bd 5347 }
339064d5 5348 if (qiov->iov[i].iov_len % alignment) {
1ff735bd 5349 return false;
c53b1c51
SH
5350 }
5351 }
5352
5353 return true;
5354}
5355
b8afb520
FZ
5356BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5357 Error **errp)
7cd1e32a
LS
5358{
5359 int64_t bitmap_size;
e4654d2d 5360 BdrvDirtyBitmap *bitmap;
a55eb92c 5361
50717e94
PB
5362 assert((granularity & (granularity - 1)) == 0);
5363
e4654d2d
FZ
5364 granularity >>= BDRV_SECTOR_BITS;
5365 assert(granularity);
57322b78 5366 bitmap_size = bdrv_nb_sectors(bs);
b8afb520
FZ
5367 if (bitmap_size < 0) {
5368 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5369 errno = -bitmap_size;
5370 return NULL;
5371 }
5839e53b 5372 bitmap = g_new0(BdrvDirtyBitmap, 1);
e4654d2d
FZ
5373 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5374 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5375 return bitmap;
5376}
5377
5378void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5379{
5380 BdrvDirtyBitmap *bm, *next;
5381 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5382 if (bm == bitmap) {
5383 QLIST_REMOVE(bitmap, list);
5384 hbitmap_free(bitmap->bitmap);
5385 g_free(bitmap);
5386 return;
a55eb92c 5387 }
7cd1e32a
LS
5388 }
5389}
5390
21b56835
FZ
5391BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5392{
5393 BdrvDirtyBitmap *bm;
5394 BlockDirtyInfoList *list = NULL;
5395 BlockDirtyInfoList **plist = &list;
5396
5397 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5839e53b
MA
5398 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5399 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
21b56835
FZ
5400 info->count = bdrv_get_dirty_count(bs, bm);
5401 info->granularity =
5402 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5403 entry->value = info;
5404 *plist = entry;
5405 plist = &entry->next;
5406 }
5407
5408 return list;
5409}
5410
e4654d2d 5411int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
7cd1e32a 5412{
e4654d2d
FZ
5413 if (bitmap) {
5414 return hbitmap_get(bitmap->bitmap, sector);
7cd1e32a
LS
5415 } else {
5416 return 0;
5417 }
5418}
5419
e4654d2d
FZ
5420void bdrv_dirty_iter_init(BlockDriverState *bs,
5421 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
1755da16 5422{
e4654d2d 5423 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
1755da16
PB
5424}
5425
5426void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5427 int nr_sectors)
5428{
e4654d2d
FZ
5429 BdrvDirtyBitmap *bitmap;
5430 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5431 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5432 }
1755da16
PB
5433}
5434
e4654d2d 5435void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
7cd1e32a 5436{
e4654d2d
FZ
5437 BdrvDirtyBitmap *bitmap;
5438 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5439 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5440 }
7cd1e32a 5441}
aaa0eb75 5442
e4654d2d 5443int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
aaa0eb75 5444{
e4654d2d 5445 return hbitmap_count(bitmap->bitmap);
aaa0eb75 5446}
f88e1a42 5447
9fcb0251
FZ
5448/* Get a reference to bs */
5449void bdrv_ref(BlockDriverState *bs)
5450{
5451 bs->refcnt++;
5452}
5453
5454/* Release a previously grabbed reference to bs.
5455 * If after releasing, reference count is zero, the BlockDriverState is
5456 * deleted. */
5457void bdrv_unref(BlockDriverState *bs)
5458{
9a4d5ca6
JC
5459 if (!bs) {
5460 return;
5461 }
9fcb0251
FZ
5462 assert(bs->refcnt > 0);
5463 if (--bs->refcnt == 0) {
5464 bdrv_delete(bs);
5465 }
5466}
5467
fbe40ff7
FZ
5468struct BdrvOpBlocker {
5469 Error *reason;
5470 QLIST_ENTRY(BdrvOpBlocker) list;
5471};
5472
5473bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5474{
5475 BdrvOpBlocker *blocker;
5476 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5477 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5478 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5479 if (errp) {
5480 error_setg(errp, "Device '%s' is busy: %s",
5481 bs->device_name, error_get_pretty(blocker->reason));
5482 }
5483 return true;
5484 }
5485 return false;
5486}
5487
5488void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5489{
5490 BdrvOpBlocker *blocker;
5491 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5492
5839e53b 5493 blocker = g_new0(BdrvOpBlocker, 1);
fbe40ff7
FZ
5494 blocker->reason = reason;
5495 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5496}
5497
5498void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5499{
5500 BdrvOpBlocker *blocker, *next;
5501 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5502 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5503 if (blocker->reason == reason) {
5504 QLIST_REMOVE(blocker, list);
5505 g_free(blocker);
5506 }
5507 }
5508}
5509
5510void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5511{
5512 int i;
5513 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5514 bdrv_op_block(bs, i, reason);
5515 }
5516}
5517
5518void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5519{
5520 int i;
5521 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5522 bdrv_op_unblock(bs, i, reason);
5523 }
5524}
5525
5526bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5527{
5528 int i;
5529
5530 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5531 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5532 return false;
5533 }
5534 }
5535 return true;
5536}
5537
28a7282a
LC
5538void bdrv_iostatus_enable(BlockDriverState *bs)
5539{
d6bf279e 5540 bs->iostatus_enabled = true;
58e21ef5 5541 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
5542}
5543
5544/* The I/O status is only enabled if the drive explicitly
5545 * enables it _and_ the VM is configured to stop on errors */
5546bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5547{
d6bf279e 5548 return (bs->iostatus_enabled &&
92aa5c6d
PB
5549 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5550 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5551 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
28a7282a
LC
5552}
5553
5554void bdrv_iostatus_disable(BlockDriverState *bs)
5555{
d6bf279e 5556 bs->iostatus_enabled = false;
28a7282a
LC
5557}
5558
5559void bdrv_iostatus_reset(BlockDriverState *bs)
5560{
5561 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 5562 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3bd293c3
PB
5563 if (bs->job) {
5564 block_job_iostatus_reset(bs->job);
5565 }
28a7282a
LC
5566 }
5567}
5568
28a7282a
LC
5569void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5570{
3e1caa5f
PB
5571 assert(bdrv_iostatus_is_enabled(bs));
5572 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
58e21ef5
LC
5573 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5574 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
5575 }
5576}
5577
d92ada22
LC
5578void bdrv_img_create(const char *filename, const char *fmt,
5579 const char *base_filename, const char *base_fmt,
f382d43a
MR
5580 char *options, uint64_t img_size, int flags,
5581 Error **errp, bool quiet)
f88e1a42 5582{
83d0521a
CL
5583 QemuOptsList *create_opts = NULL;
5584 QemuOpts *opts = NULL;
5585 const char *backing_fmt, *backing_file;
5586 int64_t size;
f88e1a42 5587 BlockDriver *drv, *proto_drv;
96df67d1 5588 BlockDriver *backing_drv = NULL;
cc84d90f 5589 Error *local_err = NULL;
f88e1a42
JS
5590 int ret = 0;
5591
5592 /* Find driver and parse its options */
5593 drv = bdrv_find_format(fmt);
5594 if (!drv) {
71c79813 5595 error_setg(errp, "Unknown file format '%s'", fmt);
d92ada22 5596 return;
f88e1a42
JS
5597 }
5598
98289620 5599 proto_drv = bdrv_find_protocol(filename, true);
f88e1a42 5600 if (!proto_drv) {
71c79813 5601 error_setg(errp, "Unknown protocol '%s'", filename);
d92ada22 5602 return;
f88e1a42
JS
5603 }
5604
c282e1fd
CL
5605 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5606 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
f88e1a42
JS
5607
5608 /* Create parameter list with default values */
83d0521a
CL
5609 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5610 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
f88e1a42
JS
5611
5612 /* Parse -o options */
5613 if (options) {
83d0521a
CL
5614 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5615 error_setg(errp, "Invalid options for file format '%s'", fmt);
f88e1a42
JS
5616 goto out;
5617 }
5618 }
5619
5620 if (base_filename) {
83d0521a 5621 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
71c79813
LC
5622 error_setg(errp, "Backing file not supported for file format '%s'",
5623 fmt);
f88e1a42
JS
5624 goto out;
5625 }
5626 }
5627
5628 if (base_fmt) {
83d0521a 5629 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
71c79813
LC
5630 error_setg(errp, "Backing file format not supported for file "
5631 "format '%s'", fmt);
f88e1a42
JS
5632 goto out;
5633 }
5634 }
5635
83d0521a
CL
5636 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5637 if (backing_file) {
5638 if (!strcmp(filename, backing_file)) {
71c79813
LC
5639 error_setg(errp, "Error: Trying to create an image with the "
5640 "same filename as the backing file");
792da93a
JS
5641 goto out;
5642 }
5643 }
5644
83d0521a
CL
5645 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5646 if (backing_fmt) {
5647 backing_drv = bdrv_find_format(backing_fmt);
96df67d1 5648 if (!backing_drv) {
71c79813 5649 error_setg(errp, "Unknown backing file format '%s'",
83d0521a 5650 backing_fmt);
f88e1a42
JS
5651 goto out;
5652 }
5653 }
5654
5655 // The size for the image must always be specified, with one exception:
5656 // If we are using a backing file, we can obtain the size from there
83d0521a
CL
5657 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5658 if (size == -1) {
5659 if (backing_file) {
66f6b814 5660 BlockDriverState *bs;
52bf1e72 5661 int64_t size;
63090dac
PB
5662 int back_flags;
5663
5664 /* backing files always opened read-only */
5665 back_flags =
5666 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 5667
f67503e5 5668 bs = NULL;
83d0521a 5669 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
cc84d90f 5670 backing_drv, &local_err);
f88e1a42 5671 if (ret < 0) {
cc84d90f 5672 error_setg_errno(errp, -ret, "Could not open '%s': %s",
83d0521a 5673 backing_file,
cc84d90f
HR
5674 error_get_pretty(local_err));
5675 error_free(local_err);
5676 local_err = NULL;
f88e1a42
JS
5677 goto out;
5678 }
52bf1e72
MA
5679 size = bdrv_getlength(bs);
5680 if (size < 0) {
5681 error_setg_errno(errp, -size, "Could not get size of '%s'",
5682 backing_file);
5683 bdrv_unref(bs);
5684 goto out;
5685 }
f88e1a42 5686
83d0521a 5687 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
66f6b814
HR
5688
5689 bdrv_unref(bs);
f88e1a42 5690 } else {
71c79813 5691 error_setg(errp, "Image creation needs a size parameter");
f88e1a42
JS
5692 goto out;
5693 }
5694 }
5695
f382d43a
MR
5696 if (!quiet) {
5697 printf("Formatting '%s', fmt=%s ", filename, fmt);
83d0521a 5698 qemu_opts_print(opts);
f382d43a
MR
5699 puts("");
5700 }
83d0521a 5701
c282e1fd 5702 ret = bdrv_create(drv, filename, opts, &local_err);
83d0521a 5703
cc84d90f
HR
5704 if (ret == -EFBIG) {
5705 /* This is generally a better message than whatever the driver would
5706 * deliver (especially because of the cluster_size_hint), since that
5707 * is most probably not much different from "image too large". */
5708 const char *cluster_size_hint = "";
83d0521a 5709 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
cc84d90f 5710 cluster_size_hint = " (try using a larger cluster size)";
f88e1a42 5711 }
cc84d90f
HR
5712 error_setg(errp, "The image size is too large for file format '%s'"
5713 "%s", fmt, cluster_size_hint);
5714 error_free(local_err);
5715 local_err = NULL;
f88e1a42
JS
5716 }
5717
5718out:
83d0521a
CL
5719 qemu_opts_del(opts);
5720 qemu_opts_free(create_opts);
84d18f06 5721 if (local_err) {
cc84d90f
HR
5722 error_propagate(errp, local_err);
5723 }
f88e1a42 5724}
85d126f3
SH
5725
5726AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5727{
dcd04228
SH
5728 return bs->aio_context;
5729}
5730
5731void bdrv_detach_aio_context(BlockDriverState *bs)
5732{
33384421
HR
5733 BdrvAioNotifier *baf;
5734
dcd04228
SH
5735 if (!bs->drv) {
5736 return;
5737 }
5738
33384421
HR
5739 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5740 baf->detach_aio_context(baf->opaque);
5741 }
5742
13af91eb
SH
5743 if (bs->io_limits_enabled) {
5744 throttle_detach_aio_context(&bs->throttle_state);
5745 }
dcd04228
SH
5746 if (bs->drv->bdrv_detach_aio_context) {
5747 bs->drv->bdrv_detach_aio_context(bs);
5748 }
5749 if (bs->file) {
5750 bdrv_detach_aio_context(bs->file);
5751 }
5752 if (bs->backing_hd) {
5753 bdrv_detach_aio_context(bs->backing_hd);
5754 }
5755
5756 bs->aio_context = NULL;
5757}
5758
5759void bdrv_attach_aio_context(BlockDriverState *bs,
5760 AioContext *new_context)
5761{
33384421
HR
5762 BdrvAioNotifier *ban;
5763
dcd04228
SH
5764 if (!bs->drv) {
5765 return;
5766 }
5767
5768 bs->aio_context = new_context;
5769
5770 if (bs->backing_hd) {
5771 bdrv_attach_aio_context(bs->backing_hd, new_context);
5772 }
5773 if (bs->file) {
5774 bdrv_attach_aio_context(bs->file, new_context);
5775 }
5776 if (bs->drv->bdrv_attach_aio_context) {
5777 bs->drv->bdrv_attach_aio_context(bs, new_context);
5778 }
13af91eb
SH
5779 if (bs->io_limits_enabled) {
5780 throttle_attach_aio_context(&bs->throttle_state, new_context);
5781 }
33384421
HR
5782
5783 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5784 ban->attached_aio_context(new_context, ban->opaque);
5785 }
dcd04228
SH
5786}
5787
5788void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5789{
5790 bdrv_drain_all(); /* ensure there are no in-flight requests */
5791
5792 bdrv_detach_aio_context(bs);
5793
5794 /* This function executes in the old AioContext so acquire the new one in
5795 * case it runs in a different thread.
5796 */
5797 aio_context_acquire(new_context);
5798 bdrv_attach_aio_context(bs, new_context);
5799 aio_context_release(new_context);
85d126f3 5800}
d616b224 5801
33384421
HR
5802void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5803 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5804 void (*detach_aio_context)(void *opaque), void *opaque)
5805{
5806 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5807 *ban = (BdrvAioNotifier){
5808 .attached_aio_context = attached_aio_context,
5809 .detach_aio_context = detach_aio_context,
5810 .opaque = opaque
5811 };
5812
5813 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5814}
5815
5816void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5817 void (*attached_aio_context)(AioContext *,
5818 void *),
5819 void (*detach_aio_context)(void *),
5820 void *opaque)
5821{
5822 BdrvAioNotifier *ban, *ban_next;
5823
5824 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5825 if (ban->attached_aio_context == attached_aio_context &&
5826 ban->detach_aio_context == detach_aio_context &&
5827 ban->opaque == opaque)
5828 {
5829 QLIST_REMOVE(ban, list);
5830 g_free(ban);
5831
5832 return;
5833 }
5834 }
5835
5836 abort();
5837}
5838
d616b224
SH
5839void bdrv_add_before_write_notifier(BlockDriverState *bs,
5840 NotifierWithReturn *notifier)
5841{
5842 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5843}
6f176b48 5844
c282e1fd 5845int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
6f176b48 5846{
c282e1fd 5847 if (!bs->drv->bdrv_amend_options) {
6f176b48
HR
5848 return -ENOTSUP;
5849 }
c282e1fd 5850 return bs->drv->bdrv_amend_options(bs, opts);
6f176b48 5851}
f6186f49 5852
b5042a36
BC
5853/* This function will be called by the bdrv_recurse_is_first_non_filter method
5854 * of block filter and by bdrv_is_first_non_filter.
5855 * It is used to test if the given bs is the candidate or recurse more in the
5856 * node graph.
212a5a8f 5857 */
b5042a36 5858bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
212a5a8f 5859 BlockDriverState *candidate)
f6186f49 5860{
b5042a36
BC
5861 /* return false if basic checks fails */
5862 if (!bs || !bs->drv) {
212a5a8f 5863 return false;
f6186f49
BC
5864 }
5865
b5042a36
BC
5866 /* the code reached a non block filter driver -> check if the bs is
5867 * the same as the candidate. It's the recursion termination condition.
5868 */
5869 if (!bs->drv->is_filter) {
5870 return bs == candidate;
212a5a8f 5871 }
b5042a36 5872 /* Down this path the driver is a block filter driver */
212a5a8f 5873
b5042a36
BC
5874 /* If the block filter recursion method is defined use it to recurse down
5875 * the node graph.
5876 */
5877 if (bs->drv->bdrv_recurse_is_first_non_filter) {
212a5a8f 5878 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
f6186f49
BC
5879 }
5880
b5042a36
BC
5881 /* the driver is a block filter but don't allow to recurse -> return false
5882 */
5883 return false;
f6186f49
BC
5884}
5885
212a5a8f
BC
5886/* This function checks if the candidate is the first non filter bs down it's
5887 * bs chain. Since we don't have pointers to parents it explore all bs chains
5888 * from the top. Some filters can choose not to pass down the recursion.
5889 */
5890bool bdrv_is_first_non_filter(BlockDriverState *candidate)
f6186f49 5891{
212a5a8f
BC
5892 BlockDriverState *bs;
5893
5894 /* walk down the bs forest recursively */
5895 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5896 bool perm;
5897
b5042a36 5898 /* try to recurse in this top level bs */
e6dc8a1f 5899 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
212a5a8f
BC
5900
5901 /* candidate is the first non filter */
5902 if (perm) {
5903 return true;
5904 }
5905 }
5906
5907 return false;
f6186f49 5908}
09158f00
BC
5909
5910BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
5911{
5912 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5913 if (!to_replace_bs) {
5914 error_setg(errp, "Node name '%s' not found", node_name);
5915 return NULL;
5916 }
5917
5918 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5919 return NULL;
5920 }
5921
5922 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5923 * most non filter in order to prevent data corruption.
5924 * Another benefit is that this tests exclude backing files which are
5925 * blocked by the backing blockers.
5926 */
5927 if (!bdrv_is_first_non_filter(to_replace_bs)) {
5928 error_setg(errp, "Only top most non filter can be replaced");
5929 return NULL;
5930 }
5931
5932 return to_replace_bs;
5933}
448ad91d
ML
5934
5935void bdrv_io_plug(BlockDriverState *bs)
5936{
5937 BlockDriver *drv = bs->drv;
5938 if (drv && drv->bdrv_io_plug) {
5939 drv->bdrv_io_plug(bs);
5940 } else if (bs->file) {
5941 bdrv_io_plug(bs->file);
5942 }
5943}
5944
5945void bdrv_io_unplug(BlockDriverState *bs)
5946{
5947 BlockDriver *drv = bs->drv;
5948 if (drv && drv->bdrv_io_unplug) {
5949 drv->bdrv_io_unplug(bs);
5950 } else if (bs->file) {
5951 bdrv_io_unplug(bs->file);
5952 }
5953}
5954
5955void bdrv_flush_io_queue(BlockDriverState *bs)
5956{
5957 BlockDriver *drv = bs->drv;
5958 if (drv && drv->bdrv_flush_io_queue) {
5959 drv->bdrv_flush_io_queue(bs);
5960 } else if (bs->file) {
5961 bdrv_flush_io_queue(bs->file);
5962 }
5963}
91af7014
HR
5964
5965static bool append_open_options(QDict *d, BlockDriverState *bs)
5966{
5967 const QDictEntry *entry;
5968 bool found_any = false;
5969
5970 for (entry = qdict_first(bs->options); entry;
5971 entry = qdict_next(bs->options, entry))
5972 {
5973 /* Only take options for this level and exclude all non-driver-specific
5974 * options */
5975 if (!strchr(qdict_entry_key(entry), '.') &&
5976 strcmp(qdict_entry_key(entry), "node-name"))
5977 {
5978 qobject_incref(qdict_entry_value(entry));
5979 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
5980 found_any = true;
5981 }
5982 }
5983
5984 return found_any;
5985}
5986
5987/* Updates the following BDS fields:
5988 * - exact_filename: A filename which may be used for opening a block device
5989 * which (mostly) equals the given BDS (even without any
5990 * other options; so reading and writing must return the same
5991 * results, but caching etc. may be different)
5992 * - full_open_options: Options which, when given when opening a block device
5993 * (without a filename), result in a BDS (mostly)
5994 * equalling the given one
5995 * - filename: If exact_filename is set, it is copied here. Otherwise,
5996 * full_open_options is converted to a JSON object, prefixed with
5997 * "json:" (for use through the JSON pseudo protocol) and put here.
5998 */
5999void bdrv_refresh_filename(BlockDriverState *bs)
6000{
6001 BlockDriver *drv = bs->drv;
6002 QDict *opts;
6003
6004 if (!drv) {
6005 return;
6006 }
6007
6008 /* This BDS's file name will most probably depend on its file's name, so
6009 * refresh that first */
6010 if (bs->file) {
6011 bdrv_refresh_filename(bs->file);
6012 }
6013
6014 if (drv->bdrv_refresh_filename) {
6015 /* Obsolete information is of no use here, so drop the old file name
6016 * information before refreshing it */
6017 bs->exact_filename[0] = '\0';
6018 if (bs->full_open_options) {
6019 QDECREF(bs->full_open_options);
6020 bs->full_open_options = NULL;
6021 }
6022
6023 drv->bdrv_refresh_filename(bs);
6024 } else if (bs->file) {
6025 /* Try to reconstruct valid information from the underlying file */
6026 bool has_open_options;
6027
6028 bs->exact_filename[0] = '\0';
6029 if (bs->full_open_options) {
6030 QDECREF(bs->full_open_options);
6031 bs->full_open_options = NULL;
6032 }
6033
6034 opts = qdict_new();
6035 has_open_options = append_open_options(opts, bs);
6036
6037 /* If no specific options have been given for this BDS, the filename of
6038 * the underlying file should suffice for this one as well */
6039 if (bs->file->exact_filename[0] && !has_open_options) {
6040 strcpy(bs->exact_filename, bs->file->exact_filename);
6041 }
6042 /* Reconstructing the full options QDict is simple for most format block
6043 * drivers, as long as the full options are known for the underlying
6044 * file BDS. The full options QDict of that file BDS should somehow
6045 * contain a representation of the filename, therefore the following
6046 * suffices without querying the (exact_)filename of this BDS. */
6047 if (bs->file->full_open_options) {
6048 qdict_put_obj(opts, "driver",
6049 QOBJECT(qstring_from_str(drv->format_name)));
6050 QINCREF(bs->file->full_open_options);
6051 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6052
6053 bs->full_open_options = opts;
6054 } else {
6055 QDECREF(opts);
6056 }
6057 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6058 /* There is no underlying file BDS (at least referenced by BDS.file),
6059 * so the full options QDict should be equal to the options given
6060 * specifically for this block device when it was opened (plus the
6061 * driver specification).
6062 * Because those options don't change, there is no need to update
6063 * full_open_options when it's already set. */
6064
6065 opts = qdict_new();
6066 append_open_options(opts, bs);
6067 qdict_put_obj(opts, "driver",
6068 QOBJECT(qstring_from_str(drv->format_name)));
6069
6070 if (bs->exact_filename[0]) {
6071 /* This may not work for all block protocol drivers (some may
6072 * require this filename to be parsed), but we have to find some
6073 * default solution here, so just include it. If some block driver
6074 * does not support pure options without any filename at all or
6075 * needs some special format of the options QDict, it needs to
6076 * implement the driver-specific bdrv_refresh_filename() function.
6077 */
6078 qdict_put_obj(opts, "filename",
6079 QOBJECT(qstring_from_str(bs->exact_filename)));
6080 }
6081
6082 bs->full_open_options = opts;
6083 }
6084
6085 if (bs->exact_filename[0]) {
6086 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6087 } else if (bs->full_open_options) {
6088 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6089 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6090 qstring_get_str(json));
6091 QDECREF(json);
6092 }
6093}
5366d0c8
BC
6094
6095/* This accessor function purpose is to allow the device models to access the
6096 * BlockAcctStats structure embedded inside a BlockDriverState without being
6097 * aware of the BlockDriverState structure layout.
6098 * It will go away when the BlockAcctStats structure will be moved inside
6099 * the device models.
6100 */
6101BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6102{
6103 return &bs->stats;
6104}
This page took 1.764172 seconds and 4 git commands to generate.