]>
Commit | Line | Data |
---|---|---|
ea2384d3 FB |
1 | /* |
2 | * QEMU System Emulator block driver | |
5fafdf24 | 3 | * |
ea2384d3 | 4 | * Copyright (c) 2003 Fabrice Bellard |
5fafdf24 | 5 | * |
ea2384d3 FB |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | * of this software and associated documentation files (the "Software"), to deal | |
8 | * in the Software without restriction, including without limitation the rights | |
9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 | * copies of the Software, and to permit persons to whom the Software is | |
11 | * furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
22 | * THE SOFTWARE. | |
23 | */ | |
24 | #ifndef BLOCK_INT_H | |
25 | #define BLOCK_INT_H | |
26 | ||
5e5a94b6 | 27 | #include "block/accounting.h" |
737e150e | 28 | #include "block/block.h" |
7719f3c9 | 29 | #include "block/aio-wait.h" |
1de7afc9 | 30 | #include "qemu/queue.h" |
10817bf0 | 31 | #include "qemu/coroutine.h" |
f7946da2 | 32 | #include "qemu/stats64.h" |
1de7afc9 | 33 | #include "qemu/timer.h" |
8f0720ec | 34 | #include "qemu/hbitmap.h" |
f364ec65 | 35 | #include "block/snapshot.h" |
6a1751b7 | 36 | #include "qemu/main-loop.h" |
cc0681c4 | 37 | #include "qemu/throttle.h" |
faf07963 | 38 | |
bfe8043e | 39 | #define BLOCK_FLAG_LAZY_REFCOUNTS 8 |
ec36ba14 | 40 | |
bfe8043e SH |
41 | #define BLOCK_OPT_SIZE "size" |
42 | #define BLOCK_OPT_ENCRYPT "encryption" | |
0cb8d47b | 43 | #define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format" |
bfe8043e | 44 | #define BLOCK_OPT_COMPAT6 "compat6" |
f249924e | 45 | #define BLOCK_OPT_HWVERSION "hwversion" |
bfe8043e SH |
46 | #define BLOCK_OPT_BACKING_FILE "backing_file" |
47 | #define BLOCK_OPT_BACKING_FMT "backing_fmt" | |
48 | #define BLOCK_OPT_CLUSTER_SIZE "cluster_size" | |
49 | #define BLOCK_OPT_TABLE_SIZE "table_size" | |
50 | #define BLOCK_OPT_PREALLOC "preallocation" | |
51 | #define BLOCK_OPT_SUBFMT "subformat" | |
52 | #define BLOCK_OPT_COMPAT_LEVEL "compat" | |
53 | #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" | |
7f2039f6 | 54 | #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" |
b3af018f | 55 | #define BLOCK_OPT_REDUNDANCY "redundancy" |
4ab15590 | 56 | #define BLOCK_OPT_NOCOW "nocow" |
876eb1b0 | 57 | #define BLOCK_OPT_OBJECT_SIZE "object_size" |
06d05fa7 | 58 | #define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" |
0e7e1989 | 59 | |
7cddd372 KW |
60 | #define BLOCK_PROBE_BUF_SIZE 512 |
61 | ||
ebde595c FZ |
62 | enum BdrvTrackedRequestType { |
63 | BDRV_TRACKED_READ, | |
64 | BDRV_TRACKED_WRITE, | |
ebde595c | 65 | BDRV_TRACKED_DISCARD, |
1bc5f09f | 66 | BDRV_TRACKED_TRUNCATE, |
ebde595c FZ |
67 | }; |
68 | ||
d616b224 SH |
69 | typedef struct BdrvTrackedRequest { |
70 | BlockDriverState *bs; | |
793ed47a | 71 | int64_t offset; |
22931a15 | 72 | uint64_t bytes; |
ebde595c | 73 | enum BdrvTrackedRequestType type; |
7327145f | 74 | |
2dbafdc0 | 75 | bool serialising; |
7327145f | 76 | int64_t overlap_offset; |
22931a15 | 77 | uint64_t overlap_bytes; |
7327145f | 78 | |
d616b224 SH |
79 | QLIST_ENTRY(BdrvTrackedRequest) list; |
80 | Coroutine *co; /* owner, used for deadlock detection */ | |
81 | CoQueue wait_queue; /* coroutines blocked on this request */ | |
6460440f KW |
82 | |
83 | struct BdrvTrackedRequest *waiting_for; | |
d616b224 SH |
84 | } BdrvTrackedRequest; |
85 | ||
ea2384d3 FB |
86 | struct BlockDriver { |
87 | const char *format_name; | |
88 | int instance_size; | |
f6186f49 | 89 | |
5a612c00 MP |
90 | /* set to true if the BlockDriver is a block filter. Block filters pass |
91 | * certain callbacks that refer to data (see block.c) to their bs->file if | |
92 | * the driver doesn't implement them. Drivers that do not wish to forward | |
93 | * must implement them and return -ENOTSUP. | |
94 | */ | |
b5042a36 BC |
95 | bool is_filter; |
96 | /* for snapshots block filter like Quorum can implement the | |
97 | * following recursive callback. | |
212a5a8f BC |
98 | * It's purpose is to recurse on the filter children while calling |
99 | * bdrv_recurse_is_first_non_filter on them. | |
100 | * For a sample implementation look in the future Quorum block filter. | |
f6186f49 | 101 | */ |
212a5a8f BC |
102 | bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs, |
103 | BlockDriverState *candidate); | |
f6186f49 | 104 | |
ea2384d3 | 105 | int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); |
508c7cb3 | 106 | int (*bdrv_probe_device)(const char *filename); |
c2ad1b0c KW |
107 | |
108 | /* Any driver implementing this callback is expected to be able to handle | |
109 | * NULL file names in its .bdrv_open() implementation */ | |
6963a30d | 110 | void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp); |
030be321 BC |
111 | /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have |
112 | * this field set to true, except ones that are defined only by their | |
113 | * child's bs. | |
114 | * An example of the last type will be the quorum block driver. | |
115 | */ | |
116 | bool bdrv_needs_filename; | |
e971aa12 | 117 | |
8ee79e70 KW |
118 | /* Set if a driver can support backing files */ |
119 | bool supports_backing; | |
120 | ||
e971aa12 JC |
121 | /* For handling image reopen for split or non-split files */ |
122 | int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, | |
123 | BlockReopenQueue *queue, Error **errp); | |
124 | void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); | |
125 | void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); | |
5365f44d | 126 | void (*bdrv_join_options)(QDict *options, QDict *old_options); |
e971aa12 | 127 | |
015a1036 HR |
128 | int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, |
129 | Error **errp); | |
1e486cf3 FR |
130 | |
131 | /* Protocol drivers should implement this instead of bdrv_open */ | |
015a1036 HR |
132 | int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, |
133 | Error **errp); | |
e2731add | 134 | void (*bdrv_close)(BlockDriverState *bs); |
b0292b85 | 135 | int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, |
efc75e2a | 136 | Error **errp); |
b0292b85 KW |
137 | int coroutine_fn (*bdrv_co_create_opts)(const char *filename, |
138 | QemuOpts *opts, | |
139 | Error **errp); | |
95389c86 | 140 | int (*bdrv_make_empty)(BlockDriverState *bs); |
91af7014 | 141 | |
4cdd01d3 | 142 | void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options); |
91af7014 | 143 | |
83f64091 | 144 | /* aio */ |
e31f6864 EB |
145 | BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs, |
146 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags, | |
147 | BlockCompletionFunc *cb, void *opaque); | |
e31f6864 EB |
148 | BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs, |
149 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags, | |
150 | BlockCompletionFunc *cb, void *opaque); | |
7c84b1b8 | 151 | BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, |
097310b5 | 152 | BlockCompletionFunc *cb, void *opaque); |
4da444a0 | 153 | BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, |
f5a5ca79 | 154 | int64_t offset, int bytes, |
097310b5 | 155 | BlockCompletionFunc *cb, void *opaque); |
83f64091 | 156 | |
da1fa91d KW |
157 | int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, |
158 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); | |
64182a6b DB |
159 | |
160 | /** | |
161 | * @offset: position in bytes to read at | |
162 | * @bytes: number of bytes to read | |
163 | * @qiov: the buffers to fill with read data | |
164 | * @flags: currently unused, always 0 | |
165 | * | |
166 | * @offset and @bytes will be a multiple of 'request_alignment', | |
167 | * but the length of individual @qiov elements does not have to | |
168 | * be a multiple. | |
169 | * | |
170 | * @bytes will always equal the total size of @qiov, and will be | |
171 | * no larger than 'max_transfer'. | |
172 | * | |
173 | * The buffer in @qiov may point directly to guest memory. | |
174 | */ | |
3fb06697 KW |
175 | int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, |
176 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); | |
da1fa91d | 177 | int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, |
93f5e6d8 | 178 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); |
64182a6b DB |
179 | /** |
180 | * @offset: position in bytes to write at | |
181 | * @bytes: number of bytes to write | |
182 | * @qiov: the buffers containing data to write | |
183 | * @flags: zero or more bits allowed by 'supported_write_flags' | |
184 | * | |
185 | * @offset and @bytes will be a multiple of 'request_alignment', | |
186 | * but the length of individual @qiov elements does not have to | |
187 | * be a multiple. | |
188 | * | |
189 | * @bytes will always equal the total size of @qiov, and will be | |
190 | * no larger than 'max_transfer'. | |
191 | * | |
192 | * The buffer in @qiov may point directly to guest memory. | |
193 | */ | |
3fb06697 KW |
194 | int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, |
195 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); | |
93f5e6d8 | 196 | |
f08f2dda SH |
197 | /* |
198 | * Efficiently zero a region of the disk image. Typically an image format | |
199 | * would use a compact metadata representation to implement this. This | |
465fe887 EB |
200 | * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev() |
201 | * will be called instead. | |
f08f2dda | 202 | */ |
d05aa8bb | 203 | int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, |
f5a5ca79 | 204 | int64_t offset, int bytes, BdrvRequestFlags flags); |
47a5486d | 205 | int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, |
f5a5ca79 | 206 | int64_t offset, int bytes); |
4c41cb49 | 207 | |
fcc67678 FZ |
208 | /* Map [offset, offset + nbytes) range onto a child of @bs to copy from, |
209 | * and invoke bdrv_co_copy_range_from(child, ...), or invoke | |
210 | * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from. | |
211 | * | |
212 | * See the comment of bdrv_co_copy_range for the parameter and return value | |
213 | * semantics. | |
214 | */ | |
215 | int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs, | |
216 | BdrvChild *src, | |
217 | uint64_t offset, | |
218 | BdrvChild *dst, | |
219 | uint64_t dst_offset, | |
220 | uint64_t bytes, | |
67b51fb9 VSO |
221 | BdrvRequestFlags read_flags, |
222 | BdrvRequestFlags write_flags); | |
fcc67678 FZ |
223 | |
224 | /* Map [offset, offset + nbytes) range onto a child of bs to copy data to, | |
225 | * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy | |
226 | * operation if @bs is the leaf and @src has the same BlockDriver. Return | |
227 | * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver. | |
228 | * | |
229 | * See the comment of bdrv_co_copy_range for the parameter and return value | |
230 | * semantics. | |
231 | */ | |
232 | int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs, | |
233 | BdrvChild *src, | |
234 | uint64_t src_offset, | |
235 | BdrvChild *dst, | |
236 | uint64_t dst_offset, | |
237 | uint64_t bytes, | |
67b51fb9 VSO |
238 | BdrvRequestFlags read_flags, |
239 | BdrvRequestFlags write_flags); | |
fcc67678 | 240 | |
4c41cb49 | 241 | /* |
298a1665 EB |
242 | * Building block for bdrv_block_status[_above] and |
243 | * bdrv_is_allocated[_above]. The driver should answer only | |
86a3d5c6 EB |
244 | * according to the current layer, and should only need to set |
245 | * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, | |
246 | * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing | |
247 | * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See | |
248 | * block.h for the overall meaning of the bits. As a hint, the | |
249 | * flag want_zero is true if the caller cares more about precise | |
250 | * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for | |
251 | * overall allocation (favor larger *pnum, perhaps by reporting | |
252 | * _DATA instead of _ZERO). The block layer guarantees input | |
253 | * clamped to bdrv_getlength() and aligned to request_alignment, | |
254 | * as well as non-NULL pnum, map, and file; in turn, the driver | |
255 | * must return an error or set pnum to an aligned non-zero value. | |
4c41cb49 | 256 | */ |
86a3d5c6 EB |
257 | int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, |
258 | bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, | |
259 | int64_t *map, BlockDriverState **file); | |
da1fa91d | 260 | |
0f15423c AL |
261 | /* |
262 | * Invalidate any cached meta-data. | |
263 | */ | |
2b148f39 PB |
264 | void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, |
265 | Error **errp); | |
76b1c7fe | 266 | int (*bdrv_inactivate)(BlockDriverState *bs); |
0f15423c | 267 | |
c32b82af PD |
268 | /* |
269 | * Flushes all data for all layers by calling bdrv_co_flush for underlying | |
270 | * layers, if needed. This function is needed for deterministic | |
271 | * synchronization of the flush finishing callback. | |
272 | */ | |
273 | int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); | |
274 | ||
c68b89ac KW |
275 | /* |
276 | * Flushes all data that was already written to the OS all the way down to | |
c1bb86cd | 277 | * the disk (for example file-posix.c calls fsync()). |
c68b89ac KW |
278 | */ |
279 | int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); | |
280 | ||
eb489bb1 KW |
281 | /* |
282 | * Flushes all internal caches to the OS. The data may still sit in a | |
283 | * writeback cache of the host OS, but it will survive a crash of the qemu | |
284 | * process. | |
285 | */ | |
286 | int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); | |
287 | ||
1e486cf3 FR |
288 | /* |
289 | * Drivers setting this field must be able to work with just a plain | |
290 | * filename with '<protocol_name>:' as a prefix, and no other options. | |
291 | * Options may be extracted from the filename by implementing | |
292 | * bdrv_parse_filename. | |
293 | */ | |
83f64091 | 294 | const char *protocol_name; |
061ca8a3 KW |
295 | int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, |
296 | PreallocMode prealloc, Error **errp); | |
b94a2610 | 297 | |
83f64091 | 298 | int64_t (*bdrv_getlength)(BlockDriverState *bs); |
b94a2610 | 299 | bool has_variable_length; |
4a1d5e1f | 300 | int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); |
90880ff1 SH |
301 | BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs, |
302 | Error **errp); | |
b94a2610 | 303 | |
29a298af PB |
304 | int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, |
305 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov); | |
306 | ||
5fafdf24 | 307 | int (*bdrv_snapshot_create)(BlockDriverState *bs, |
faea38e7 | 308 | QEMUSnapshotInfo *sn_info); |
5fafdf24 | 309 | int (*bdrv_snapshot_goto)(BlockDriverState *bs, |
faea38e7 | 310 | const char *snapshot_id); |
a89d89d3 WX |
311 | int (*bdrv_snapshot_delete)(BlockDriverState *bs, |
312 | const char *snapshot_id, | |
313 | const char *name, | |
314 | Error **errp); | |
5fafdf24 | 315 | int (*bdrv_snapshot_list)(BlockDriverState *bs, |
faea38e7 | 316 | QEMUSnapshotInfo **psn_info); |
51ef6727 | 317 | int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, |
7b4c4781 WX |
318 | const char *snapshot_id, |
319 | const char *name, | |
320 | Error **errp); | |
faea38e7 | 321 | int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); |
eae041fe | 322 | ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs); |
83f64091 | 323 | |
1a8ae822 KW |
324 | int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, |
325 | QEMUIOVector *qiov, | |
326 | int64_t pos); | |
327 | int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, | |
328 | QEMUIOVector *qiov, | |
329 | int64_t pos); | |
178e08a5 | 330 | |
756e6736 KW |
331 | int (*bdrv_change_backing_file)(BlockDriverState *bs, |
332 | const char *backing_file, const char *backing_fmt); | |
333 | ||
19cb3738 | 334 | /* removable device specific */ |
e031f750 | 335 | bool (*bdrv_is_inserted)(BlockDriverState *bs); |
f36f3949 | 336 | void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); |
025e849a | 337 | void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); |
3b46e624 | 338 | |
985a03b0 | 339 | /* to control generic scsi devices */ |
7c84b1b8 | 340 | BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, |
221f715d | 341 | unsigned long int req, void *buf, |
097310b5 | 342 | BlockCompletionFunc *cb, void *opaque); |
16a389dc KW |
343 | int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs, |
344 | unsigned long int req, void *buf); | |
985a03b0 | 345 | |
0e7e1989 | 346 | /* List of options for creating images, terminated by name == NULL */ |
83d0521a | 347 | QemuOptsList *create_opts; |
5eb45639 | 348 | |
9ac228e0 KW |
349 | /* |
350 | * Returns 0 for completed check, -errno for internal errors. | |
351 | * The check results are stored in result. | |
352 | */ | |
2fd61638 PB |
353 | int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, |
354 | BdrvCheckResult *result, | |
355 | BdrvCheckMode fix); | |
e97fc193 | 356 | |
77485434 | 357 | int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts, |
8b13976d | 358 | BlockDriverAmendStatusCB *status_cb, |
d1402b50 HR |
359 | void *cb_opaque, |
360 | Error **errp); | |
6f176b48 | 361 | |
a31939e6 | 362 | void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); |
8b9b0cc2 | 363 | |
41c695c7 KW |
364 | /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ |
365 | int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, | |
366 | const char *tag); | |
4cc70e93 FZ |
367 | int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, |
368 | const char *tag); | |
41c695c7 KW |
369 | int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); |
370 | bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); | |
371 | ||
3baca891 | 372 | void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); |
d34682cd | 373 | |
336c1c12 KW |
374 | /* |
375 | * Returns 1 if newly created images are guaranteed to contain only | |
376 | * zeros, 0 otherwise. | |
377 | */ | |
378 | int (*bdrv_has_zero_init)(BlockDriverState *bs); | |
12c09b8c | 379 | |
dcd04228 SH |
380 | /* Remove fd handlers, timers, and other event loop callbacks so the event |
381 | * loop is no longer in use. Called with no in-flight requests and in | |
382 | * depth-first traversal order with parents before child nodes. | |
383 | */ | |
384 | void (*bdrv_detach_aio_context)(BlockDriverState *bs); | |
385 | ||
386 | /* Add fd handlers, timers, and other event loop callbacks so I/O requests | |
387 | * can be processed again. Called with no in-flight requests and in | |
388 | * depth-first traversal order with child nodes before parent nodes. | |
389 | */ | |
390 | void (*bdrv_attach_aio_context)(BlockDriverState *bs, | |
391 | AioContext *new_context); | |
392 | ||
448ad91d ML |
393 | /* io queue for linux-aio */ |
394 | void (*bdrv_io_plug)(BlockDriverState *bs); | |
395 | void (*bdrv_io_unplug)(BlockDriverState *bs); | |
448ad91d | 396 | |
892b7de8 ET |
397 | /** |
398 | * Try to get @bs's logical and physical block size. | |
399 | * On success, store them in @bsz and return zero. | |
400 | * On failure, return negative errno. | |
401 | */ | |
402 | int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); | |
403 | /** | |
404 | * Try to get @bs's geometry (cyls, heads, sectors) | |
405 | * On success, store them in @geo and return 0. | |
406 | * On failure return -errno. | |
407 | * Only drivers that want to override guest geometry implement this | |
408 | * callback; see hd_geometry_guess(). | |
409 | */ | |
410 | int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); | |
411 | ||
67da1dc5 | 412 | /** |
f8ea8dac | 413 | * bdrv_co_drain_begin is called if implemented in the beginning of a |
481cad48 MP |
414 | * drain operation to drain and stop any internal sources of requests in |
415 | * the driver. | |
416 | * bdrv_co_drain_end is called if implemented at the end of the drain. | |
417 | * | |
418 | * They should be used by the driver to e.g. manage scheduled I/O | |
419 | * requests, or toggle an internal state. After the end of the drain new | |
420 | * requests will continue normally. | |
67da1dc5 | 421 | */ |
f8ea8dac | 422 | void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); |
481cad48 | 423 | void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); |
67da1dc5 | 424 | |
e06018ad WC |
425 | void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child, |
426 | Error **errp); | |
427 | void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, | |
428 | Error **errp); | |
429 | ||
33a610c3 KW |
430 | /** |
431 | * Informs the block driver that a permission change is intended. The | |
432 | * driver checks whether the change is permissible and may take other | |
433 | * preparations for the change (e.g. get file system locks). This operation | |
434 | * is always followed either by a call to either .bdrv_set_perm or | |
435 | * .bdrv_abort_perm_update. | |
436 | * | |
437 | * Checks whether the requested set of cumulative permissions in @perm | |
438 | * can be granted for accessing @bs and whether no other users are using | |
439 | * permissions other than those given in @shared (both arguments take | |
440 | * BLK_PERM_* bitmasks). | |
441 | * | |
442 | * If both conditions are met, 0 is returned. Otherwise, -errno is returned | |
443 | * and errp is set to an error describing the conflict. | |
444 | */ | |
445 | int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, | |
446 | uint64_t shared, Error **errp); | |
447 | ||
448 | /** | |
449 | * Called to inform the driver that the set of cumulative set of used | |
450 | * permissions for @bs has changed to @perm, and the set of sharable | |
451 | * permission to @shared. The driver can use this to propagate changes to | |
452 | * its children (i.e. request permissions only if a parent actually needs | |
453 | * them). | |
454 | * | |
455 | * This function is only invoked after bdrv_check_perm(), so block drivers | |
456 | * may rely on preparations made in their .bdrv_check_perm implementation. | |
457 | */ | |
458 | void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); | |
459 | ||
460 | /* | |
461 | * Called to inform the driver that after a previous bdrv_check_perm() | |
462 | * call, the permission update is not performed and any preparations made | |
463 | * for it (e.g. taken file locks) need to be undone. | |
464 | * | |
465 | * This function can be called even for nodes that never saw a | |
466 | * bdrv_check_perm() call. It is a no-op then. | |
467 | */ | |
468 | void (*bdrv_abort_perm_update)(BlockDriverState *bs); | |
469 | ||
470 | /** | |
471 | * Returns in @nperm and @nshared the permissions that the driver for @bs | |
472 | * needs on its child @c, based on the cumulative permissions requested by | |
473 | * the parents in @parent_perm and @parent_shared. | |
474 | * | |
475 | * If @c is NULL, return the permissions for attaching a new child for the | |
476 | * given @role. | |
e0995dc3 KW |
477 | * |
478 | * If @reopen_queue is non-NULL, don't return the currently needed | |
479 | * permissions, but those that will be needed after applying the | |
480 | * @reopen_queue. | |
33a610c3 KW |
481 | */ |
482 | void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, | |
483 | const BdrvChildRole *role, | |
e0995dc3 | 484 | BlockReopenQueue *reopen_queue, |
33a610c3 KW |
485 | uint64_t parent_perm, uint64_t parent_shared, |
486 | uint64_t *nperm, uint64_t *nshared); | |
487 | ||
cb9ff6c2 VSO |
488 | /** |
489 | * Bitmaps should be marked as 'IN_USE' in the image on reopening image | |
490 | * as rw. This handler should realize it. It also should unset readonly | |
491 | * field of BlockDirtyBitmap's in case of success. | |
492 | */ | |
493 | int (*bdrv_reopen_bitmaps_rw)(BlockDriverState *bs, Error **errp); | |
67b792f5 VSO |
494 | bool (*bdrv_can_store_new_dirty_bitmap)(BlockDriverState *bs, |
495 | const char *name, | |
496 | uint32_t granularity, | |
497 | Error **errp); | |
56f364e6 VSO |
498 | void (*bdrv_remove_persistent_dirty_bitmap)(BlockDriverState *bs, |
499 | const char *name, | |
500 | Error **errp); | |
cb9ff6c2 | 501 | |
23d0ba93 FZ |
502 | /** |
503 | * Register/unregister a buffer for I/O. For example, when the driver is | |
504 | * interested to know the memory areas that will later be used in iovs, so | |
505 | * that it can do IOMMU mapping with VFIO etc., in order to get better | |
506 | * performance. In the case of VFIO drivers, this callback is used to do | |
507 | * DMA mapping for hot buffers. | |
508 | */ | |
509 | void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size); | |
510 | void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host); | |
8a22f02a | 511 | QLIST_ENTRY(BlockDriver) list; |
ea2384d3 FB |
512 | }; |
513 | ||
fe81c2cc | 514 | typedef struct BlockLimits { |
a5b8dd2c EB |
515 | /* Alignment requirement, in bytes, for offset/length of I/O |
516 | * requests. Must be a power of 2 less than INT_MAX; defaults to | |
517 | * 1 for drivers with modern byte interfaces, and to 512 | |
518 | * otherwise. */ | |
519 | uint32_t request_alignment; | |
520 | ||
b8d0a980 EB |
521 | /* Maximum number of bytes that can be discarded at once (since it |
522 | * is signed, it must be < 2G, if set). Must be multiple of | |
b9f7855a EB |
523 | * pdiscard_alignment, but need not be power of 2. May be 0 if no |
524 | * inherent 32-bit limit */ | |
525 | int32_t max_pdiscard; | |
526 | ||
b8d0a980 EB |
527 | /* Optimal alignment for discard requests in bytes. A power of 2 |
528 | * is best but not mandatory. Must be a multiple of | |
529 | * bl.request_alignment, and must be less than max_pdiscard if | |
530 | * that is set. May be 0 if bl.request_alignment is good enough */ | |
b9f7855a | 531 | uint32_t pdiscard_alignment; |
fe81c2cc | 532 | |
b8d0a980 EB |
533 | /* Maximum number of bytes that can zeroized at once (since it is |
534 | * signed, it must be < 2G, if set). Must be multiple of | |
29cc6a68 | 535 | * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */ |
cf081fca | 536 | int32_t max_pwrite_zeroes; |
fe81c2cc | 537 | |
b8d0a980 EB |
538 | /* Optimal alignment for write zeroes requests in bytes. A power |
539 | * of 2 is best but not mandatory. Must be a multiple of | |
540 | * bl.request_alignment, and must be less than max_pwrite_zeroes | |
541 | * if that is set. May be 0 if bl.request_alignment is good | |
542 | * enough */ | |
cf081fca | 543 | uint32_t pwrite_zeroes_alignment; |
7337acaf | 544 | |
b8d0a980 EB |
545 | /* Optimal transfer length in bytes. A power of 2 is best but not |
546 | * mandatory. Must be a multiple of bl.request_alignment, or 0 if | |
547 | * no preferred size */ | |
5def6b80 EB |
548 | uint32_t opt_transfer; |
549 | ||
b8d0a980 EB |
550 | /* Maximal transfer length in bytes. Need not be power of 2, but |
551 | * must be multiple of opt_transfer and bl.request_alignment, or 0 | |
552 | * for no 32-bit limit. For now, anything larger than INT_MAX is | |
553 | * clamped down. */ | |
5def6b80 | 554 | uint32_t max_transfer; |
2647fab5 | 555 | |
a5b8dd2c | 556 | /* memory alignment, in bytes so that no bounce buffer is needed */ |
4196d2f0 DL |
557 | size_t min_mem_alignment; |
558 | ||
a5b8dd2c | 559 | /* memory alignment, in bytes, for bounce buffer */ |
339064d5 | 560 | size_t opt_mem_alignment; |
bd44feb7 SH |
561 | |
562 | /* maximum number of iovec elements */ | |
563 | int max_iov; | |
fe81c2cc PL |
564 | } BlockLimits; |
565 | ||
fbe40ff7 FZ |
566 | typedef struct BdrvOpBlocker BdrvOpBlocker; |
567 | ||
33384421 HR |
568 | typedef struct BdrvAioNotifier { |
569 | void (*attached_aio_context)(AioContext *new_context, void *opaque); | |
570 | void (*detach_aio_context)(void *opaque); | |
571 | ||
572 | void *opaque; | |
e8a095da | 573 | bool deleted; |
33384421 HR |
574 | |
575 | QLIST_ENTRY(BdrvAioNotifier) list; | |
576 | } BdrvAioNotifier; | |
577 | ||
f3930ed0 | 578 | struct BdrvChildRole { |
5fe31c25 KW |
579 | /* If true, bdrv_replace_node() doesn't change the node this BdrvChild |
580 | * points to. */ | |
26de9438 KW |
581 | bool stay_at_node; |
582 | ||
6cd5c9d7 KW |
583 | /* If true, the parent is a BlockDriverState and bdrv_next_all_states() |
584 | * will return it. This information is used for drain_all, where every node | |
585 | * will be drained separately, so the drain only needs to be propagated to | |
586 | * non-BDS parents. */ | |
587 | bool parent_is_bds; | |
588 | ||
8e2160e2 KW |
589 | void (*inherit_options)(int *child_flags, QDict *child_options, |
590 | int parent_flags, QDict *parent_options); | |
c2066af0 | 591 | |
5c8cab48 KW |
592 | void (*change_media)(BdrvChild *child, bool load); |
593 | void (*resize)(BdrvChild *child); | |
594 | ||
4c265bf9 KW |
595 | /* Returns a name that is supposedly more useful for human users than the |
596 | * node name for identifying the node in question (in particular, a BB | |
597 | * name), or NULL if the parent can't provide a better name. */ | |
d4a7f45e | 598 | const char *(*get_name)(BdrvChild *child); |
4c265bf9 | 599 | |
b5411555 KW |
600 | /* Returns a malloced string that describes the parent of the child for a |
601 | * human reader. This could be a node-name, BlockBackend name, qdev ID or | |
602 | * QOM path of the device owning the BlockBackend, job type and ID etc. The | |
603 | * caller is responsible for freeing the memory. */ | |
d4a7f45e | 604 | char *(*get_parent_desc)(BdrvChild *child); |
b5411555 | 605 | |
c2066af0 KW |
606 | /* |
607 | * If this pair of functions is implemented, the parent doesn't issue new | |
608 | * requests after returning from .drained_begin() until .drained_end() is | |
609 | * called. | |
610 | * | |
4be6a6d1 KW |
611 | * These functions must not change the graph (and therefore also must not |
612 | * call aio_poll(), which could change the graph indirectly). | |
613 | * | |
c2066af0 KW |
614 | * Note that this can be nested. If drained_begin() was called twice, new |
615 | * I/O is allowed only after drained_end() was called twice, too. | |
616 | */ | |
617 | void (*drained_begin)(BdrvChild *child); | |
618 | void (*drained_end)(BdrvChild *child); | |
db95dbba | 619 | |
89bd0305 KW |
620 | /* |
621 | * Returns whether the parent has pending requests for the child. This | |
622 | * callback is polled after .drained_begin() has been called until all | |
623 | * activity on the child has stopped. | |
624 | */ | |
625 | bool (*drained_poll)(BdrvChild *child); | |
626 | ||
cfa1a572 KW |
627 | /* Notifies the parent that the child has been activated/inactivated (e.g. |
628 | * when migration is completing) and it can start/stop requesting | |
629 | * permissions and doing I/O on it. */ | |
4417ab7a | 630 | void (*activate)(BdrvChild *child, Error **errp); |
cfa1a572 | 631 | int (*inactivate)(BdrvChild *child); |
4417ab7a | 632 | |
db95dbba KW |
633 | void (*attach)(BdrvChild *child); |
634 | void (*detach)(BdrvChild *child); | |
6858eba0 KW |
635 | |
636 | /* Notifies the parent that the filename of its child has changed (e.g. | |
637 | * because the direct child was removed from the backing chain), so that it | |
638 | * can update its reference. */ | |
639 | int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, | |
640 | const char *filename, Error **errp); | |
f3930ed0 KW |
641 | }; |
642 | ||
643 | extern const BdrvChildRole child_file; | |
644 | extern const BdrvChildRole child_format; | |
91ef3825 | 645 | extern const BdrvChildRole child_backing; |
f3930ed0 | 646 | |
b4b059f6 | 647 | struct BdrvChild { |
6e93e7c4 | 648 | BlockDriverState *bs; |
260fecf1 | 649 | char *name; |
6e93e7c4 | 650 | const BdrvChildRole *role; |
22aa8b24 | 651 | void *opaque; |
d5e6f437 KW |
652 | |
653 | /** | |
654 | * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) | |
655 | */ | |
656 | uint64_t perm; | |
657 | ||
658 | /** | |
659 | * Permissions that can still be granted to other users of @bs while this | |
660 | * BdrvChild is still attached to it. (BLK_PERM_* bitmask) | |
661 | */ | |
662 | uint64_t shared_perm; | |
663 | ||
6e93e7c4 | 664 | QLIST_ENTRY(BdrvChild) next; |
d42a8a93 | 665 | QLIST_ENTRY(BdrvChild) next_parent; |
b4b059f6 | 666 | }; |
6e93e7c4 | 667 | |
8802d1fd JC |
668 | /* |
669 | * Note: the function bdrv_append() copies and swaps contents of | |
670 | * BlockDriverStates, so if you add new fields to this struct, please | |
671 | * inspect bdrv_append() to determine if the new fields need to be | |
672 | * copied as well. | |
673 | */ | |
ea2384d3 | 674 | struct BlockDriverState { |
91bcea48 PB |
675 | /* Protected by big QEMU lock or read-only after opening. No special |
676 | * locking needed during I/O... | |
677 | */ | |
4dca4b63 | 678 | int open_flags; /* flags used to open the file, re-used for re-open */ |
54115412 EB |
679 | bool read_only; /* if true, the media is read only */ |
680 | bool encrypted; /* if true, the media is encrypted */ | |
54115412 EB |
681 | bool sg; /* if true, the device is a /dev/sg* */ |
682 | bool probed; /* if true, format was probed rather than specified */ | |
5a9347c6 | 683 | bool force_share; /* if true, always allow all shared permissions */ |
d3c8c674 | 684 | bool implicit; /* if true, this filter node was automatically inserted */ |
54115412 | 685 | |
19cb3738 | 686 | BlockDriver *drv; /* NULL means no media */ |
ea2384d3 FB |
687 | void *opaque; |
688 | ||
dcd04228 | 689 | AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ |
33384421 HR |
690 | /* long-running tasks intended to always use the same AioContext as this |
691 | * BDS may register themselves in this list to be notified of changes | |
692 | * regarding this BDS's context */ | |
693 | QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; | |
e8a095da | 694 | bool walking_aio_notifiers; /* to make removal during iteration safe */ |
dcd04228 | 695 | |
9a29e18f JC |
696 | char filename[PATH_MAX]; |
697 | char backing_file[PATH_MAX]; /* if non zero, the image is a diff of | |
698 | this file image */ | |
5eb45639 | 699 | char backing_format[16]; /* if non-zero and backing_file exists */ |
19cb3738 | 700 | |
91af7014 | 701 | QDict *full_open_options; |
9a29e18f | 702 | char exact_filename[PATH_MAX]; |
91af7014 | 703 | |
760e0063 | 704 | BdrvChild *backing; |
9a4f4c31 | 705 | BdrvChild *file; |
66f82cee | 706 | |
fe81c2cc PL |
707 | /* I/O Limits */ |
708 | BlockLimits bl; | |
709 | ||
c1e3489d HR |
710 | /* Flags honored during pwrite (so far: BDRV_REQ_FUA, |
711 | * BDRV_REQ_WRITE_UNCHANGED). | |
712 | * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those | |
713 | * writes will be issued as normal writes without the flag set. | |
714 | * This is important to note for drivers that do not explicitly | |
715 | * request a WRITE permission for their children and instead take | |
716 | * the same permissions as their parent did (this is commonly what | |
717 | * block filters do). Such drivers have to be aware that the | |
718 | * parent may have taken a WRITE_UNCHANGED permission only and is | |
719 | * issuing such requests. Drivers either must make sure that | |
720 | * these requests do not result in plain WRITE accesses (usually | |
721 | * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding | |
722 | * every incoming write request as-is, including potentially that | |
723 | * flag), or they have to explicitly take the WRITE permission for | |
724 | * their children. */ | |
4df863f3 | 725 | unsigned int supported_write_flags; |
d05aa8bb | 726 | /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, |
c1e3489d | 727 | * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ |
465fe887 | 728 | unsigned int supported_zero_flags; |
c25f53b0 | 729 | |
dc364f4c BC |
730 | /* the following member gives a name to every node on the bs graph. */ |
731 | char node_name[32]; | |
732 | /* element of the list of named nodes building the graph */ | |
733 | QTAILQ_ENTRY(BlockDriverState) node_list; | |
2c1d04e0 HR |
734 | /* element of the list of all BlockDriverStates (all_bdrv_states) */ |
735 | QTAILQ_ENTRY(BlockDriverState) bs_list; | |
9c4218e9 HR |
736 | /* element of the list of monitor-owned BDS */ |
737 | QTAILQ_ENTRY(BlockDriverState) monitor_list; | |
9fcb0251 | 738 | int refcnt; |
dbffbdcf | 739 | |
fbe40ff7 FZ |
740 | /* operation blockers */ |
741 | QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; | |
742 | ||
eeec61f2 SH |
743 | /* long-running background operation */ |
744 | BlockJob *job; | |
e971aa12 | 745 | |
bddcec37 KW |
746 | /* The node that this node inherited default options from (and a reopen on |
747 | * which can affect this node by changing these defaults). This is always a | |
748 | * parent node of this node. */ | |
749 | BlockDriverState *inherits_from; | |
6e93e7c4 | 750 | QLIST_HEAD(, BdrvChild) children; |
d42a8a93 | 751 | QLIST_HEAD(, BdrvChild) parents; |
6e93e7c4 | 752 | |
de9c0cec | 753 | QDict *options; |
145f598e | 754 | QDict *explicit_options; |
465bee1d | 755 | BlockdevDetectZeroesOptions detect_zeroes; |
826b6ca0 FZ |
756 | |
757 | /* The error object in use for blocking operations on backing_hd */ | |
758 | Error *backing_blocker; | |
e2462113 | 759 | |
91bcea48 PB |
760 | /* Protected by AioContext lock */ |
761 | ||
91bcea48 | 762 | /* If we are reading a disk image, give its size in sectors. |
5e22479a JQ |
763 | * Generally read-only; it is written to by load_snapshot and |
764 | * save_snaphost, but the block layer is quiescent during those. | |
91bcea48 PB |
765 | */ |
766 | int64_t total_sectors; | |
767 | ||
768 | /* Callback before write request is processed */ | |
769 | NotifierWithReturnList before_write_notifiers; | |
770 | ||
e2462113 FR |
771 | /* threshold limit for writes, in bytes. "High water mark". */ |
772 | uint64_t write_threshold_offset; | |
773 | NotifierWithReturn write_threshold_notifier; | |
51288d79 | 774 | |
2119882c PB |
775 | /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. |
776 | * Reading from the list can be done with either the BQL or the | |
b64bd51e PB |
777 | * dirty_bitmap_mutex. Modifying a bitmap only requires |
778 | * dirty_bitmap_mutex. */ | |
2119882c | 779 | QemuMutex dirty_bitmap_mutex; |
91bcea48 PB |
780 | QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; |
781 | ||
f7946da2 PB |
782 | /* Offset after the highest byte written to */ |
783 | Stat64 wr_highest_offset; | |
784 | ||
d3faa13e PB |
785 | /* If true, copy read backing sectors into image. Can be >1 if more |
786 | * than one client has requested copy-on-read. Accessed with atomic | |
787 | * ops. | |
788 | */ | |
789 | int copy_on_read; | |
790 | ||
20fc71b2 PB |
791 | /* number of in-flight requests; overall and serialising. |
792 | * Accessed with atomic ops. | |
793 | */ | |
794 | unsigned int in_flight; | |
795 | unsigned int serialising_in_flight; | |
796 | ||
7719f3c9 SH |
797 | /* Kicked to signal main loop when a request completes. */ |
798 | AioWait wait; | |
e2a6ae7f | 799 | |
850d54a2 PB |
800 | /* counter for nested bdrv_io_plug. |
801 | * Accessed with atomic ops. | |
802 | */ | |
803 | unsigned io_plugged; | |
804 | ||
91bcea48 PB |
805 | /* do we need to tell the quest if we have a volatile write cache? */ |
806 | int enable_write_cache; | |
807 | ||
414c2ec3 | 808 | /* Accessed with atomic ops. */ |
51288d79 | 809 | int quiesce_counter; |
d736f119 KW |
810 | int recursive_quiesce_counter; |
811 | ||
47fec599 | 812 | unsigned int write_gen; /* Current data generation */ |
3783fa3d PB |
813 | |
814 | /* Protected by reqs_lock. */ | |
815 | CoMutex reqs_lock; | |
816 | QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; | |
817 | CoQueue flush_queue; /* Serializing flush queue */ | |
818 | bool active_flush_req; /* Flush request in flight? */ | |
819 | ||
820 | /* Only read/written by whoever has set active_flush_req to true. */ | |
821 | unsigned int flushed_gen; /* Flushed write generation */ | |
ea2384d3 FB |
822 | }; |
823 | ||
281d22d8 HR |
824 | struct BlockBackendRootState { |
825 | int open_flags; | |
826 | bool read_only; | |
827 | BlockdevDetectZeroesOptions detect_zeroes; | |
281d22d8 HR |
828 | }; |
829 | ||
274fccee HR |
830 | typedef enum BlockMirrorBackingMode { |
831 | /* Reuse the existing backing chain from the source for the target. | |
832 | * - sync=full: Set backing BDS to NULL. | |
833 | * - sync=top: Use source's backing BDS. | |
834 | * - sync=none: Use source as the backing BDS. */ | |
835 | MIRROR_SOURCE_BACKING_CHAIN, | |
836 | ||
837 | /* Open the target's backing chain completely anew */ | |
838 | MIRROR_OPEN_BACKING_CHAIN, | |
839 | ||
840 | /* Do not change the target's backing BDS after job completion */ | |
841 | MIRROR_LEAVE_BACKING_CHAIN, | |
842 | } BlockMirrorBackingMode; | |
843 | ||
760e0063 KW |
844 | static inline BlockDriverState *backing_bs(BlockDriverState *bs) |
845 | { | |
846 | return bs->backing ? bs->backing->bs : NULL; | |
847 | } | |
848 | ||
5f535a94 HR |
849 | |
850 | /* Essential block drivers which must always be statically linked into qemu, and | |
851 | * which therefore can be accessed without using bdrv_find_format() */ | |
852 | extern BlockDriver bdrv_file; | |
853 | extern BlockDriver bdrv_raw; | |
854 | extern BlockDriver bdrv_qcow2; | |
855 | ||
a03ef88f | 856 | int coroutine_fn bdrv_co_preadv(BdrvChild *child, |
1bf1cbc9 KW |
857 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, |
858 | BdrvRequestFlags flags); | |
a03ef88f | 859 | int coroutine_fn bdrv_co_pwritev(BdrvChild *child, |
a8823a3b KW |
860 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, |
861 | BdrvRequestFlags flags); | |
1bf1cbc9 | 862 | |
0f12264e | 863 | extern unsigned int bdrv_drain_all_count; |
d736f119 KW |
864 | void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); |
865 | void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); | |
866 | ||
eba25057 | 867 | int get_tmp_filename(char *filename, int size); |
38f3ef57 KW |
868 | BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, |
869 | const char *filename); | |
95389c86 | 870 | |
03c320d8 HR |
871 | void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, |
872 | QDict *options); | |
873 | ||
0563e191 | 874 | |
d616b224 SH |
875 | /** |
876 | * bdrv_add_before_write_notifier: | |
877 | * | |
878 | * Register a callback that is invoked before write requests are processed but | |
879 | * after any throttling or waiting for overlapping requests. | |
880 | */ | |
881 | void bdrv_add_before_write_notifier(BlockDriverState *bs, | |
882 | NotifierWithReturn *notifier); | |
883 | ||
dcd04228 SH |
884 | /** |
885 | * bdrv_detach_aio_context: | |
886 | * | |
887 | * May be called from .bdrv_detach_aio_context() to detach children from the | |
888 | * current #AioContext. This is only needed by block drivers that manage their | |
760e0063 | 889 | * own children. Both ->file and ->backing are automatically handled and |
dcd04228 SH |
890 | * block drivers should not call this function on them explicitly. |
891 | */ | |
892 | void bdrv_detach_aio_context(BlockDriverState *bs); | |
893 | ||
894 | /** | |
895 | * bdrv_attach_aio_context: | |
896 | * | |
897 | * May be called from .bdrv_attach_aio_context() to attach children to the new | |
898 | * #AioContext. This is only needed by block drivers that manage their own | |
760e0063 | 899 | * children. Both ->file and ->backing are automatically handled and block |
dcd04228 SH |
900 | * drivers should not call this function on them explicitly. |
901 | */ | |
902 | void bdrv_attach_aio_context(BlockDriverState *bs, | |
903 | AioContext *new_context); | |
904 | ||
33384421 HR |
905 | /** |
906 | * bdrv_add_aio_context_notifier: | |
907 | * | |
908 | * If a long-running job intends to be always run in the same AioContext as a | |
909 | * certain BDS, it may use this function to be notified of changes regarding the | |
910 | * association of the BDS to an AioContext. | |
911 | * | |
912 | * attached_aio_context() is called after the target BDS has been attached to a | |
913 | * new AioContext; detach_aio_context() is called before the target BDS is being | |
914 | * detached from its old AioContext. | |
915 | */ | |
916 | void bdrv_add_aio_context_notifier(BlockDriverState *bs, | |
917 | void (*attached_aio_context)(AioContext *new_context, void *opaque), | |
918 | void (*detach_aio_context)(void *opaque), void *opaque); | |
919 | ||
920 | /** | |
921 | * bdrv_remove_aio_context_notifier: | |
922 | * | |
923 | * Unsubscribe of change notifications regarding the BDS's AioContext. The | |
924 | * parameters given here have to be the same as those given to | |
925 | * bdrv_add_aio_context_notifier(). | |
926 | */ | |
927 | void bdrv_remove_aio_context_notifier(BlockDriverState *bs, | |
928 | void (*aio_context_attached)(AioContext *, | |
929 | void *), | |
930 | void (*aio_context_detached)(void *), | |
931 | void *opaque); | |
932 | ||
c9d1a561 PB |
933 | /** |
934 | * bdrv_wakeup: | |
935 | * @bs: The BlockDriverState for which an I/O operation has been completed. | |
936 | * | |
937 | * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During | |
938 | * synchronous I/O on a BlockDriverState that is attached to another | |
939 | * I/O thread, the main thread lets the I/O thread's event loop run, | |
940 | * waiting for the I/O operation to complete. A bdrv_wakeup will wake | |
941 | * up the main thread if necessary. | |
942 | * | |
943 | * Manual calls to bdrv_wakeup are rarely necessary, because | |
944 | * bdrv_dec_in_flight already calls it. | |
945 | */ | |
946 | void bdrv_wakeup(BlockDriverState *bs); | |
947 | ||
508c7cb3 CH |
948 | #ifdef _WIN32 |
949 | int is_windows_drive(const char *filename); | |
950 | #endif | |
951 | ||
dc534f8f PB |
952 | /** |
953 | * stream_start: | |
2323322e AG |
954 | * @job_id: The id of the newly-created job, or %NULL to use the |
955 | * device name of @bs. | |
dc534f8f PB |
956 | * @bs: Block device to operate on. |
957 | * @base: Block device that will become the new base, or %NULL to | |
958 | * flatten the whole backing file chain onto @bs. | |
29338003 AG |
959 | * @backing_file_str: The file name that will be written to @bs as the |
960 | * the new backing file if the job completes. Ignored if @base is %NULL. | |
c83c66c3 | 961 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
1d809098 | 962 | * @on_error: The action to take upon error. |
fd7f8c65 | 963 | * @errp: Error object. |
dc534f8f PB |
964 | * |
965 | * Start a streaming operation on @bs. Clusters that are unallocated | |
966 | * in @bs, but allocated in any image between @base and @bs (both | |
967 | * exclusive) will be written to @bs. At the end of a successful | |
968 | * streaming job, the backing file of @bs will be changed to | |
29338003 AG |
969 | * @backing_file_str in the written image and to @base in the live |
970 | * BlockDriverState. | |
dc534f8f | 971 | */ |
2323322e AG |
972 | void stream_start(const char *job_id, BlockDriverState *bs, |
973 | BlockDriverState *base, const char *backing_file_str, | |
8254b6d9 | 974 | int64_t speed, BlockdevOnError on_error, Error **errp); |
4f1043b4 | 975 | |
747ff602 JC |
976 | /** |
977 | * commit_start: | |
fd62c609 AG |
978 | * @job_id: The id of the newly-created job, or %NULL to use the |
979 | * device name of @bs. | |
03544a6e FZ |
980 | * @bs: Active block device. |
981 | * @top: Top block device to be committed. | |
982 | * @base: Block device that will be written into, and become the new top. | |
5360782d JS |
983 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
984 | * See @BlockJobCreateFlags | |
747ff602 JC |
985 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
986 | * @on_error: The action to take upon error. | |
54e26900 | 987 | * @backing_file_str: String to use as the backing file in @top's overlay |
0db832f4 KW |
988 | * @filter_node_name: The node name that should be assigned to the filter |
989 | * driver that the commit job inserts into the graph above @top. NULL means | |
990 | * that a node name should be autogenerated. | |
747ff602 JC |
991 | * @errp: Error object. |
992 | * | |
993 | */ | |
fd62c609 | 994 | void commit_start(const char *job_id, BlockDriverState *bs, |
5360782d JS |
995 | BlockDriverState *base, BlockDriverState *top, |
996 | int creation_flags, int64_t speed, | |
8254b6d9 | 997 | BlockdevOnError on_error, const char *backing_file_str, |
0db832f4 | 998 | const char *filter_node_name, Error **errp); |
03544a6e FZ |
999 | /** |
1000 | * commit_active_start: | |
fd62c609 AG |
1001 | * @job_id: The id of the newly-created job, or %NULL to use the |
1002 | * device name of @bs. | |
03544a6e FZ |
1003 | * @bs: Active block device to be committed. |
1004 | * @base: Block device that will be written into, and become the new top. | |
47970dfb JS |
1005 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
1006 | * See @BlockJobCreateFlags | |
03544a6e FZ |
1007 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
1008 | * @on_error: The action to take upon error. | |
0db832f4 KW |
1009 | * @filter_node_name: The node name that should be assigned to the filter |
1010 | * driver that the commit job inserts into the graph above @bs. NULL means that | |
1011 | * a node name should be autogenerated. | |
03544a6e FZ |
1012 | * @cb: Completion function for the job. |
1013 | * @opaque: Opaque pointer value passed to @cb. | |
b49f7ead | 1014 | * @auto_complete: Auto complete the job. |
78bbd910 | 1015 | * @errp: Error object. |
03544a6e FZ |
1016 | * |
1017 | */ | |
fd62c609 | 1018 | void commit_active_start(const char *job_id, BlockDriverState *bs, |
47970dfb JS |
1019 | BlockDriverState *base, int creation_flags, |
1020 | int64_t speed, BlockdevOnError on_error, | |
0db832f4 | 1021 | const char *filter_node_name, |
78bbd910 FZ |
1022 | BlockCompletionFunc *cb, void *opaque, |
1023 | bool auto_complete, Error **errp); | |
893f7eba PB |
1024 | /* |
1025 | * mirror_start: | |
71aa9867 AG |
1026 | * @job_id: The id of the newly-created job, or %NULL to use the |
1027 | * device name of @bs. | |
893f7eba PB |
1028 | * @bs: Block device to operate on. |
1029 | * @target: Block device to write to. | |
09158f00 BC |
1030 | * @replaces: Block graph node name to replace once the mirror is done. Can |
1031 | * only be used when full mirroring is selected. | |
a1999b33 JS |
1032 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
1033 | * See @BlockJobCreateFlags | |
893f7eba | 1034 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
eee13dfe | 1035 | * @granularity: The chosen granularity for the dirty bitmap. |
08e4ed6c | 1036 | * @buf_size: The amount of data that can be in flight at one time. |
893f7eba | 1037 | * @mode: Whether to collapse all images in the chain to the target. |
274fccee | 1038 | * @backing_mode: How to establish the target's backing chain after completion. |
b952b558 PB |
1039 | * @on_source_error: The action to take upon error reading from the source. |
1040 | * @on_target_error: The action to take upon error writing to the target. | |
0fc9f8ea | 1041 | * @unmap: Whether to unmap target where source sectors only contain zeroes. |
6cdbceb1 KW |
1042 | * @filter_node_name: The node name that should be assigned to the filter |
1043 | * driver that the mirror job inserts into the graph above @bs. NULL means that | |
1044 | * a node name should be autogenerated. | |
481debaa | 1045 | * @copy_mode: When to trigger writes to the target. |
893f7eba PB |
1046 | * @errp: Error object. |
1047 | * | |
1048 | * Start a mirroring operation on @bs. Clusters that are allocated | |
e7e4f9f9 | 1049 | * in @bs will be written to @target until the job is cancelled or |
893f7eba PB |
1050 | * manually completed. At the end of a successful mirroring job, |
1051 | * @bs will be switched to read from @target. | |
1052 | */ | |
71aa9867 AG |
1053 | void mirror_start(const char *job_id, BlockDriverState *bs, |
1054 | BlockDriverState *target, const char *replaces, | |
a1999b33 JS |
1055 | int creation_flags, int64_t speed, |
1056 | uint32_t granularity, int64_t buf_size, | |
274fccee HR |
1057 | MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, |
1058 | BlockdevOnError on_source_error, | |
b952b558 | 1059 | BlockdevOnError on_target_error, |
481debaa HR |
1060 | bool unmap, const char *filter_node_name, |
1061 | MirrorCopyMode copy_mode, Error **errp); | |
893f7eba | 1062 | |
98d2c6f2 | 1063 | /* |
111049a4 | 1064 | * backup_job_create: |
70559d49 AG |
1065 | * @job_id: The id of the newly-created job, or %NULL to use the |
1066 | * device name of @bs. | |
98d2c6f2 DM |
1067 | * @bs: Block device to operate on. |
1068 | * @target: Block device to write to. | |
1069 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. | |
fc5d3f84 | 1070 | * @sync_mode: What parts of the disk image should be copied to the destination. |
4b80ab2b | 1071 | * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL. |
98d2c6f2 DM |
1072 | * @on_source_error: The action to take upon error reading from the source. |
1073 | * @on_target_error: The action to take upon error writing to the target. | |
47970dfb JS |
1074 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
1075 | * See @BlockJobCreateFlags | |
98d2c6f2 DM |
1076 | * @cb: Completion function for the job. |
1077 | * @opaque: Opaque pointer value passed to @cb. | |
78f51fde | 1078 | * @txn: Transaction that this job is part of (may be NULL). |
98d2c6f2 | 1079 | * |
111049a4 | 1080 | * Create a backup operation on @bs. Clusters in @bs are written to @target |
98d2c6f2 DM |
1081 | * until the job is cancelled or manually completed. |
1082 | */ | |
111049a4 JS |
1083 | BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, |
1084 | BlockDriverState *target, int64_t speed, | |
1085 | MirrorSyncMode sync_mode, | |
1086 | BdrvDirtyBitmap *sync_bitmap, | |
1087 | bool compress, | |
1088 | BlockdevOnError on_source_error, | |
1089 | BlockdevOnError on_target_error, | |
1090 | int creation_flags, | |
1091 | BlockCompletionFunc *cb, void *opaque, | |
62c9e416 | 1092 | JobTxn *txn, Error **errp); |
98d2c6f2 | 1093 | |
abb21ac3 KW |
1094 | void hmp_drive_add_node(Monitor *mon, const char *optstr); |
1095 | ||
f21d96d0 KW |
1096 | BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, |
1097 | const char *child_name, | |
36fe1331 | 1098 | const BdrvChildRole *child_role, |
d5e6f437 KW |
1099 | uint64_t perm, uint64_t shared_perm, |
1100 | void *opaque, Error **errp); | |
f21d96d0 KW |
1101 | void bdrv_root_unref_child(BdrvChild *child); |
1102 | ||
33a610c3 KW |
1103 | int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, |
1104 | Error **errp); | |
1105 | ||
6a1b9ee1 KW |
1106 | /* Default implementation for BlockDriver.bdrv_child_perm() that can be used by |
1107 | * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to | |
1108 | * all children */ | |
1109 | void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, | |
1110 | const BdrvChildRole *role, | |
e0995dc3 | 1111 | BlockReopenQueue *reopen_queue, |
6a1b9ee1 KW |
1112 | uint64_t perm, uint64_t shared, |
1113 | uint64_t *nperm, uint64_t *nshared); | |
1114 | ||
6b1a044a KW |
1115 | /* Default implementation for BlockDriver.bdrv_child_perm() that can be used by |
1116 | * (non-raw) image formats: Like above for bs->backing, but for bs->file it | |
1117 | * requires WRITE | RESIZE for read-write images, always requires | |
1118 | * CONSISTENT_READ and doesn't share WRITE. */ | |
1119 | void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, | |
1120 | const BdrvChildRole *role, | |
e0995dc3 | 1121 | BlockReopenQueue *reopen_queue, |
6b1a044a KW |
1122 | uint64_t perm, uint64_t shared, |
1123 | uint64_t *nperm, uint64_t *nshared); | |
33a610c3 | 1124 | |
f7cc69b3 | 1125 | /* |
3e4d0e72 | 1126 | * Default implementation for drivers to pass bdrv_co_block_status() to |
f7cc69b3 MP |
1127 | * their file. |
1128 | */ | |
3e4d0e72 EB |
1129 | int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs, |
1130 | bool want_zero, | |
1131 | int64_t offset, | |
1132 | int64_t bytes, | |
1133 | int64_t *pnum, | |
1134 | int64_t *map, | |
1135 | BlockDriverState **file); | |
f7cc69b3 | 1136 | /* |
3e4d0e72 | 1137 | * Default implementation for drivers to pass bdrv_co_block_status() to |
f7cc69b3 MP |
1138 | * their backing file. |
1139 | */ | |
3e4d0e72 EB |
1140 | int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs, |
1141 | bool want_zero, | |
1142 | int64_t offset, | |
1143 | int64_t bytes, | |
1144 | int64_t *pnum, | |
1145 | int64_t *map, | |
1146 | BlockDriverState **file); | |
1f0c461b | 1147 | const char *bdrv_get_parent_name(const BlockDriverState *bs); |
39829a01 | 1148 | void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); |
a7f53e26 | 1149 | bool blk_dev_has_removable_media(BlockBackend *blk); |
8f3a73bc | 1150 | bool blk_dev_has_tray(BlockBackend *blk); |
a7f53e26 MA |
1151 | void blk_dev_eject_request(BlockBackend *blk, bool force); |
1152 | bool blk_dev_is_tray_open(BlockBackend *blk); | |
1153 | bool blk_dev_is_medium_locked(BlockBackend *blk); | |
a7f53e26 | 1154 | |
0fdf1a4f | 1155 | void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); |
e0c47b6c | 1156 | |
df9a681d FZ |
1157 | void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); |
1158 | void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); | |
1159 | ||
99723548 PB |
1160 | void bdrv_inc_in_flight(BlockDriverState *bs); |
1161 | void bdrv_dec_in_flight(BlockDriverState *bs); | |
1162 | ||
9c4218e9 HR |
1163 | void blockdev_close_all_bdrv_states(void); |
1164 | ||
fcc67678 FZ |
1165 | int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset, |
1166 | BdrvChild *dst, uint64_t dst_offset, | |
67b51fb9 VSO |
1167 | uint64_t bytes, |
1168 | BdrvRequestFlags read_flags, | |
1169 | BdrvRequestFlags write_flags); | |
fcc67678 FZ |
1170 | int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, |
1171 | BdrvChild *dst, uint64_t dst_offset, | |
67b51fb9 VSO |
1172 | uint64_t bytes, |
1173 | BdrvRequestFlags read_flags, | |
1174 | BdrvRequestFlags write_flags); | |
fcc67678 | 1175 | |
3d9f2d2a KW |
1176 | int refresh_total_sectors(BlockDriverState *bs, int64_t hint); |
1177 | ||
ea2384d3 | 1178 | #endif /* BLOCK_INT_H */ |