]>
Commit | Line | Data |
---|---|---|
beb5f545 VSO |
1 | /* |
2 | * block_copy API | |
3 | * | |
4 | * Copyright (C) 2013 Proxmox Server Solutions | |
5 | * Copyright (c) 2019 Virtuozzo International GmbH. | |
6 | * | |
7 | * Authors: | |
8 | * Dietmar Maurer ([email protected]) | |
9 | * Vladimir Sementsov-Ogievskiy <[email protected]> | |
10 | * | |
11 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
12 | * See the COPYING file in the top-level directory. | |
13 | */ | |
14 | ||
15 | #include "qemu/osdep.h" | |
16 | ||
17 | #include "trace.h" | |
18 | #include "qapi/error.h" | |
19 | #include "block/block-copy.h" | |
20 | #include "sysemu/block-backend.h" | |
b3b7036a | 21 | #include "qemu/units.h" |
4ce5dd3e VSO |
22 | #include "qemu/coroutine.h" |
23 | #include "block/aio_task.h" | |
b3b7036a VSO |
24 | |
25 | #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB) | |
0e240245 | 26 | #define BLOCK_COPY_MAX_BUFFER (1 * MiB) |
7f739d0e | 27 | #define BLOCK_COPY_MAX_MEM (128 * MiB) |
4ce5dd3e VSO |
28 | #define BLOCK_COPY_MAX_WORKERS 64 |
29 | ||
30 | static coroutine_fn int block_copy_task_entry(AioTask *task); | |
31 | ||
32 | typedef struct BlockCopyCallState { | |
33 | bool failed; | |
34 | bool error_is_read; | |
35 | } BlockCopyCallState; | |
beb5f545 | 36 | |
e9407785 | 37 | typedef struct BlockCopyTask { |
4ce5dd3e VSO |
38 | AioTask task; |
39 | ||
1348a657 | 40 | BlockCopyState *s; |
4ce5dd3e | 41 | BlockCopyCallState *call_state; |
397f4e9d VSO |
42 | int64_t offset; |
43 | int64_t bytes; | |
4ce5dd3e | 44 | bool zeroes; |
e9407785 VSO |
45 | QLIST_ENTRY(BlockCopyTask) list; |
46 | CoQueue wait_queue; /* coroutines blocked on this task */ | |
47 | } BlockCopyTask; | |
397f4e9d | 48 | |
42ac2144 VSO |
49 | static int64_t task_end(BlockCopyTask *task) |
50 | { | |
51 | return task->offset + task->bytes; | |
52 | } | |
53 | ||
397f4e9d VSO |
54 | typedef struct BlockCopyState { |
55 | /* | |
56 | * BdrvChild objects are not owned or managed by block-copy. They are | |
57 | * provided by block-copy user and user is responsible for appropriate | |
58 | * permissions on these children. | |
59 | */ | |
60 | BdrvChild *source; | |
61 | BdrvChild *target; | |
62 | BdrvDirtyBitmap *copy_bitmap; | |
63 | int64_t in_flight_bytes; | |
64 | int64_t cluster_size; | |
65 | bool use_copy_range; | |
66 | int64_t copy_size; | |
67 | uint64_t len; | |
e9407785 | 68 | QLIST_HEAD(, BlockCopyTask) tasks; |
397f4e9d VSO |
69 | |
70 | BdrvRequestFlags write_flags; | |
71 | ||
72 | /* | |
73 | * skip_unallocated: | |
74 | * | |
75 | * Used by sync=top jobs, which first scan the source node for unallocated | |
76 | * areas and clear them in the copy_bitmap. During this process, the bitmap | |
77 | * is thus not fully initialized: It may still have bits set for areas that | |
78 | * are unallocated and should actually not be copied. | |
79 | * | |
80 | * This is indicated by skip_unallocated. | |
81 | * | |
82 | * In this case, block_copy() will query the source’s allocation status, | |
83 | * skip unallocated regions, clear them in the copy_bitmap, and invoke | |
84 | * block_copy_reset_unallocated() every time it does. | |
85 | */ | |
86 | bool skip_unallocated; | |
87 | ||
88 | ProgressMeter *progress; | |
89 | /* progress_bytes_callback: called when some copying progress is done. */ | |
90 | ProgressBytesCallbackFunc progress_bytes_callback; | |
91 | void *progress_opaque; | |
92 | ||
93 | SharedResource *mem; | |
94 | } BlockCopyState; | |
95 | ||
e9407785 VSO |
96 | static BlockCopyTask *find_conflicting_task(BlockCopyState *s, |
97 | int64_t offset, int64_t bytes) | |
17187cb6 | 98 | { |
e9407785 | 99 | BlockCopyTask *t; |
17187cb6 | 100 | |
e9407785 VSO |
101 | QLIST_FOREACH(t, &s->tasks, list) { |
102 | if (offset + bytes > t->offset && offset < t->offset + t->bytes) { | |
103 | return t; | |
17187cb6 VSO |
104 | } |
105 | } | |
106 | ||
107 | return NULL; | |
108 | } | |
109 | ||
5332e5d2 | 110 | /* |
e9407785 VSO |
111 | * If there are no intersecting tasks return false. Otherwise, wait for the |
112 | * first found intersecting tasks to finish and return true. | |
5332e5d2 VSO |
113 | */ |
114 | static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset, | |
115 | int64_t bytes) | |
a6ffe199 | 116 | { |
e9407785 | 117 | BlockCopyTask *task = find_conflicting_task(s, offset, bytes); |
17187cb6 | 118 | |
e9407785 | 119 | if (!task) { |
5332e5d2 | 120 | return false; |
17187cb6 | 121 | } |
5332e5d2 | 122 | |
e9407785 | 123 | qemu_co_queue_wait(&task->wait_queue, NULL); |
5332e5d2 VSO |
124 | |
125 | return true; | |
a6ffe199 VSO |
126 | } |
127 | ||
42ac2144 VSO |
128 | /* |
129 | * Search for the first dirty area in offset/bytes range and create task at | |
130 | * the beginning of it. | |
131 | */ | |
f13e60a9 | 132 | static BlockCopyTask *block_copy_task_create(BlockCopyState *s, |
4ce5dd3e | 133 | BlockCopyCallState *call_state, |
f13e60a9 | 134 | int64_t offset, int64_t bytes) |
a6ffe199 | 135 | { |
42ac2144 | 136 | BlockCopyTask *task; |
f13e60a9 | 137 | |
42ac2144 VSO |
138 | if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap, |
139 | offset, offset + bytes, | |
140 | s->copy_size, &offset, &bytes)) | |
141 | { | |
142 | return NULL; | |
143 | } | |
144 | ||
145 | /* region is dirty, so no existent tasks possible in it */ | |
e9407785 | 146 | assert(!find_conflicting_task(s, offset, bytes)); |
5332e5d2 VSO |
147 | |
148 | bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); | |
149 | s->in_flight_bytes += bytes; | |
150 | ||
42ac2144 | 151 | task = g_new(BlockCopyTask, 1); |
1348a657 | 152 | *task = (BlockCopyTask) { |
4ce5dd3e | 153 | .task.func = block_copy_task_entry, |
1348a657 | 154 | .s = s, |
4ce5dd3e | 155 | .call_state = call_state, |
1348a657 VSO |
156 | .offset = offset, |
157 | .bytes = bytes, | |
158 | }; | |
e9407785 VSO |
159 | qemu_co_queue_init(&task->wait_queue); |
160 | QLIST_INSERT_HEAD(&s->tasks, task, list); | |
f13e60a9 VSO |
161 | |
162 | return task; | |
a6ffe199 VSO |
163 | } |
164 | ||
5332e5d2 | 165 | /* |
e9407785 | 166 | * block_copy_task_shrink |
5332e5d2 | 167 | * |
e9407785 VSO |
168 | * Drop the tail of the task to be handled later. Set dirty bits back and |
169 | * wake up all tasks waiting for us (may be some of them are not intersecting | |
170 | * with shrunk task) | |
5332e5d2 | 171 | */ |
1348a657 | 172 | static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task, |
e9407785 | 173 | int64_t new_bytes) |
a6ffe199 | 174 | { |
e9407785 | 175 | if (new_bytes == task->bytes) { |
5332e5d2 VSO |
176 | return; |
177 | } | |
178 | ||
e9407785 | 179 | assert(new_bytes > 0 && new_bytes < task->bytes); |
5332e5d2 | 180 | |
1348a657 VSO |
181 | task->s->in_flight_bytes -= task->bytes - new_bytes; |
182 | bdrv_set_dirty_bitmap(task->s->copy_bitmap, | |
e9407785 | 183 | task->offset + new_bytes, task->bytes - new_bytes); |
5332e5d2 | 184 | |
e9407785 VSO |
185 | task->bytes = new_bytes; |
186 | qemu_co_queue_restart_all(&task->wait_queue); | |
5332e5d2 VSO |
187 | } |
188 | ||
1348a657 | 189 | static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret) |
5332e5d2 | 190 | { |
1348a657 | 191 | task->s->in_flight_bytes -= task->bytes; |
5332e5d2 | 192 | if (ret < 0) { |
1348a657 | 193 | bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes); |
5332e5d2 | 194 | } |
e9407785 VSO |
195 | QLIST_REMOVE(task, list); |
196 | qemu_co_queue_restart_all(&task->wait_queue); | |
a6ffe199 VSO |
197 | } |
198 | ||
beb5f545 VSO |
199 | void block_copy_state_free(BlockCopyState *s) |
200 | { | |
201 | if (!s) { | |
202 | return; | |
203 | } | |
204 | ||
5deb6cbd | 205 | bdrv_release_dirty_bitmap(s->copy_bitmap); |
7f739d0e | 206 | shres_destroy(s->mem); |
beb5f545 VSO |
207 | g_free(s); |
208 | } | |
209 | ||
9d31bc53 VSO |
210 | static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target) |
211 | { | |
212 | return MIN_NON_ZERO(INT_MAX, | |
213 | MIN_NON_ZERO(source->bs->bl.max_transfer, | |
214 | target->bs->bl.max_transfer)); | |
215 | } | |
216 | ||
00e30f05 | 217 | BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, |
0f4b02b7 VSO |
218 | int64_t cluster_size, |
219 | BdrvRequestFlags write_flags, Error **errp) | |
beb5f545 VSO |
220 | { |
221 | BlockCopyState *s; | |
beb5f545 VSO |
222 | BdrvDirtyBitmap *copy_bitmap; |
223 | ||
00e30f05 VSO |
224 | copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL, |
225 | errp); | |
beb5f545 VSO |
226 | if (!copy_bitmap) { |
227 | return NULL; | |
228 | } | |
229 | bdrv_disable_dirty_bitmap(copy_bitmap); | |
230 | ||
231 | s = g_new(BlockCopyState, 1); | |
232 | *s = (BlockCopyState) { | |
00e30f05 VSO |
233 | .source = source, |
234 | .target = target, | |
beb5f545 VSO |
235 | .copy_bitmap = copy_bitmap, |
236 | .cluster_size = cluster_size, | |
237 | .len = bdrv_dirty_bitmap_size(copy_bitmap), | |
238 | .write_flags = write_flags, | |
7f739d0e | 239 | .mem = shres_create(BLOCK_COPY_MAX_MEM), |
beb5f545 VSO |
240 | }; |
241 | ||
9d31bc53 | 242 | if (block_copy_max_transfer(source, target) < cluster_size) { |
0e240245 VSO |
243 | /* |
244 | * copy_range does not respect max_transfer. We don't want to bother | |
245 | * with requests smaller than block-copy cluster size, so fallback to | |
246 | * buffered copying (read and write respect max_transfer on their | |
247 | * behalf). | |
248 | */ | |
249 | s->use_copy_range = false; | |
250 | s->copy_size = cluster_size; | |
251 | } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) { | |
dcfbece6 | 252 | /* Compression supports only cluster-size writes and no copy-range. */ |
0e240245 | 253 | s->use_copy_range = false; |
dcfbece6 | 254 | s->copy_size = cluster_size; |
0e240245 VSO |
255 | } else { |
256 | /* | |
9d31bc53 VSO |
257 | * We enable copy-range, but keep small copy_size, until first |
258 | * successful copy_range (look at block_copy_do_copy). | |
0e240245 VSO |
259 | */ |
260 | s->use_copy_range = true; | |
9d31bc53 | 261 | s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); |
0e240245 | 262 | } |
beb5f545 | 263 | |
e9407785 | 264 | QLIST_INIT(&s->tasks); |
a6ffe199 | 265 | |
beb5f545 | 266 | return s; |
beb5f545 VSO |
267 | } |
268 | ||
d0ebeca1 | 269 | void block_copy_set_progress_callback( |
0f4b02b7 VSO |
270 | BlockCopyState *s, |
271 | ProgressBytesCallbackFunc progress_bytes_callback, | |
0f4b02b7 VSO |
272 | void *progress_opaque) |
273 | { | |
274 | s->progress_bytes_callback = progress_bytes_callback; | |
0f4b02b7 VSO |
275 | s->progress_opaque = progress_opaque; |
276 | } | |
277 | ||
d0ebeca1 VSO |
278 | void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm) |
279 | { | |
280 | s->progress = pm; | |
281 | } | |
282 | ||
4ce5dd3e VSO |
283 | /* |
284 | * Takes ownership of @task | |
285 | * | |
286 | * If pool is NULL directly run the task, otherwise schedule it into the pool. | |
287 | * | |
288 | * Returns: task.func return code if pool is NULL | |
289 | * otherwise -ECANCELED if pool status is bad | |
290 | * otherwise 0 (successfully scheduled) | |
291 | */ | |
292 | static coroutine_fn int block_copy_task_run(AioTaskPool *pool, | |
293 | BlockCopyTask *task) | |
294 | { | |
295 | if (!pool) { | |
296 | int ret = task->task.func(&task->task); | |
297 | ||
298 | g_free(task); | |
299 | return ret; | |
300 | } | |
301 | ||
302 | aio_task_pool_wait_slot(pool); | |
303 | if (aio_task_pool_status(pool) < 0) { | |
304 | co_put_to_shres(task->s->mem, task->bytes); | |
305 | block_copy_task_end(task, -ECANCELED); | |
306 | g_free(task); | |
307 | return -ECANCELED; | |
308 | } | |
309 | ||
310 | aio_task_pool_start_task(pool, &task->task); | |
311 | ||
312 | return 0; | |
313 | } | |
314 | ||
beb5f545 | 315 | /* |
e332a726 VSO |
316 | * block_copy_do_copy |
317 | * | |
dafaf135 VSO |
318 | * Do copy of cluster-aligned chunk. Requested region is allowed to exceed |
319 | * s->len only to cover last cluster when s->len is not aligned to clusters. | |
e332a726 VSO |
320 | * |
321 | * No sync here: nor bitmap neighter intersecting requests handling, only copy. | |
322 | * | |
323 | * Returns 0 on success. | |
beb5f545 | 324 | */ |
e332a726 | 325 | static int coroutine_fn block_copy_do_copy(BlockCopyState *s, |
8719091f | 326 | int64_t offset, int64_t bytes, |
2d57511a | 327 | bool zeroes, bool *error_is_read) |
beb5f545 VSO |
328 | { |
329 | int ret; | |
8719091f | 330 | int64_t nbytes = MIN(offset + bytes, s->len) - offset; |
e332a726 | 331 | void *bounce_buffer = NULL; |
beb5f545 | 332 | |
8719091f VSO |
333 | assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes); |
334 | assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); | |
dafaf135 | 335 | assert(QEMU_IS_ALIGNED(bytes, s->cluster_size)); |
8719091f VSO |
336 | assert(offset < s->len); |
337 | assert(offset + bytes <= s->len || | |
338 | offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size)); | |
dafaf135 | 339 | assert(nbytes < INT_MAX); |
e332a726 | 340 | |
2d57511a | 341 | if (zeroes) { |
8719091f | 342 | ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags & |
2d57511a VSO |
343 | ~BDRV_REQ_WRITE_COMPRESSED); |
344 | if (ret < 0) { | |
8719091f | 345 | trace_block_copy_write_zeroes_fail(s, offset, ret); |
2d57511a VSO |
346 | if (error_is_read) { |
347 | *error_is_read = false; | |
348 | } | |
349 | } | |
350 | return ret; | |
351 | } | |
352 | ||
e332a726 | 353 | if (s->use_copy_range) { |
8719091f | 354 | ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes, |
e332a726 VSO |
355 | 0, s->write_flags); |
356 | if (ret < 0) { | |
8719091f | 357 | trace_block_copy_copy_range_fail(s, offset, ret); |
e332a726 | 358 | s->use_copy_range = false; |
0e240245 | 359 | s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); |
e332a726 VSO |
360 | /* Fallback to read+write with allocated buffer */ |
361 | } else { | |
9d31bc53 VSO |
362 | if (s->use_copy_range) { |
363 | /* | |
364 | * Successful copy-range. Now increase copy_size. copy_range | |
365 | * does not respect max_transfer (it's a TODO), so we factor | |
366 | * that in here. | |
367 | * | |
368 | * Note: we double-check s->use_copy_range for the case when | |
369 | * parallel block-copy request unsets it during previous | |
370 | * bdrv_co_copy_range call. | |
371 | */ | |
372 | s->copy_size = | |
373 | MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), | |
374 | QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, | |
375 | s->target), | |
376 | s->cluster_size)); | |
377 | } | |
e332a726 VSO |
378 | goto out; |
379 | } | |
380 | } | |
381 | ||
0e240245 VSO |
382 | /* |
383 | * In case of failed copy_range request above, we may proceed with buffered | |
384 | * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will | |
9d31bc53 VSO |
385 | * be properly limited, so don't care too much. Moreover the most likely |
386 | * case (copy_range is unsupported for the configuration, so the very first | |
387 | * copy_range request fails) is handled by setting large copy_size only | |
388 | * after first successful copy_range. | |
0e240245 VSO |
389 | */ |
390 | ||
e332a726 | 391 | bounce_buffer = qemu_blockalign(s->source->bs, nbytes); |
beb5f545 | 392 | |
8719091f | 393 | ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0); |
beb5f545 | 394 | if (ret < 0) { |
8719091f | 395 | trace_block_copy_read_fail(s, offset, ret); |
beb5f545 VSO |
396 | if (error_is_read) { |
397 | *error_is_read = true; | |
398 | } | |
e332a726 | 399 | goto out; |
beb5f545 VSO |
400 | } |
401 | ||
8719091f | 402 | ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer, |
00e30f05 | 403 | s->write_flags); |
beb5f545 | 404 | if (ret < 0) { |
8719091f | 405 | trace_block_copy_write_fail(s, offset, ret); |
beb5f545 VSO |
406 | if (error_is_read) { |
407 | *error_is_read = false; | |
408 | } | |
e332a726 | 409 | goto out; |
beb5f545 VSO |
410 | } |
411 | ||
e332a726 | 412 | out: |
3816edd2 VSO |
413 | qemu_vfree(bounce_buffer); |
414 | ||
beb5f545 | 415 | return ret; |
beb5f545 VSO |
416 | } |
417 | ||
4ce5dd3e VSO |
418 | static coroutine_fn int block_copy_task_entry(AioTask *task) |
419 | { | |
420 | BlockCopyTask *t = container_of(task, BlockCopyTask, task); | |
421 | bool error_is_read; | |
422 | int ret; | |
423 | ||
424 | ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, | |
425 | &error_is_read); | |
426 | if (ret < 0 && !t->call_state->failed) { | |
427 | t->call_state->failed = true; | |
428 | t->call_state->error_is_read = error_is_read; | |
429 | } else { | |
430 | progress_work_done(t->s->progress, t->bytes); | |
431 | t->s->progress_bytes_callback(t->bytes, t->s->progress_opaque); | |
432 | } | |
433 | co_put_to_shres(t->s->mem, t->bytes); | |
434 | block_copy_task_end(t, ret); | |
435 | ||
436 | return ret; | |
437 | } | |
438 | ||
2d57511a VSO |
439 | static int block_copy_block_status(BlockCopyState *s, int64_t offset, |
440 | int64_t bytes, int64_t *pnum) | |
441 | { | |
442 | int64_t num; | |
443 | BlockDriverState *base; | |
444 | int ret; | |
445 | ||
446 | if (s->skip_unallocated && s->source->bs->backing) { | |
447 | base = s->source->bs->backing->bs; | |
448 | } else { | |
449 | base = NULL; | |
450 | } | |
451 | ||
452 | ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num, | |
453 | NULL, NULL); | |
454 | if (ret < 0 || num < s->cluster_size) { | |
455 | /* | |
456 | * On error or if failed to obtain large enough chunk just fallback to | |
457 | * copy one cluster. | |
458 | */ | |
459 | num = s->cluster_size; | |
460 | ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA; | |
461 | } else if (offset + num == s->len) { | |
462 | num = QEMU_ALIGN_UP(num, s->cluster_size); | |
463 | } else { | |
464 | num = QEMU_ALIGN_DOWN(num, s->cluster_size); | |
465 | } | |
466 | ||
467 | *pnum = num; | |
468 | return ret; | |
469 | } | |
470 | ||
beb5f545 VSO |
471 | /* |
472 | * Check if the cluster starting at offset is allocated or not. | |
473 | * return via pnum the number of contiguous clusters sharing this allocation. | |
474 | */ | |
475 | static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset, | |
476 | int64_t *pnum) | |
477 | { | |
00e30f05 | 478 | BlockDriverState *bs = s->source->bs; |
beb5f545 VSO |
479 | int64_t count, total_count = 0; |
480 | int64_t bytes = s->len - offset; | |
481 | int ret; | |
482 | ||
483 | assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); | |
484 | ||
485 | while (true) { | |
486 | ret = bdrv_is_allocated(bs, offset, bytes, &count); | |
487 | if (ret < 0) { | |
488 | return ret; | |
489 | } | |
490 | ||
491 | total_count += count; | |
492 | ||
493 | if (ret || count == 0) { | |
494 | /* | |
495 | * ret: partial segment(s) are considered allocated. | |
496 | * otherwise: unallocated tail is treated as an entire segment. | |
497 | */ | |
498 | *pnum = DIV_ROUND_UP(total_count, s->cluster_size); | |
499 | return ret; | |
500 | } | |
501 | ||
502 | /* Unallocated segment(s) with uncertain following segment(s) */ | |
503 | if (total_count >= s->cluster_size) { | |
504 | *pnum = total_count / s->cluster_size; | |
505 | return 0; | |
506 | } | |
507 | ||
508 | offset += count; | |
509 | bytes -= count; | |
510 | } | |
511 | } | |
512 | ||
513 | /* | |
514 | * Reset bits in copy_bitmap starting at offset if they represent unallocated | |
515 | * data in the image. May reset subsequent contiguous bits. | |
516 | * @return 0 when the cluster at @offset was unallocated, | |
517 | * 1 otherwise, and -ret on error. | |
518 | */ | |
519 | int64_t block_copy_reset_unallocated(BlockCopyState *s, | |
520 | int64_t offset, int64_t *count) | |
521 | { | |
522 | int ret; | |
523 | int64_t clusters, bytes; | |
524 | ||
525 | ret = block_copy_is_cluster_allocated(s, offset, &clusters); | |
526 | if (ret < 0) { | |
527 | return ret; | |
528 | } | |
529 | ||
530 | bytes = clusters * s->cluster_size; | |
531 | ||
532 | if (!ret) { | |
533 | bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); | |
d0ebeca1 VSO |
534 | progress_set_remaining(s->progress, |
535 | bdrv_get_dirty_count(s->copy_bitmap) + | |
536 | s->in_flight_bytes); | |
beb5f545 VSO |
537 | } |
538 | ||
539 | *count = bytes; | |
540 | return ret; | |
541 | } | |
542 | ||
5332e5d2 VSO |
543 | /* |
544 | * block_copy_dirty_clusters | |
545 | * | |
546 | * Copy dirty clusters in @offset/@bytes range. | |
547 | * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty | |
548 | * clusters found and -errno on failure. | |
549 | */ | |
550 | static int coroutine_fn block_copy_dirty_clusters(BlockCopyState *s, | |
551 | int64_t offset, int64_t bytes, | |
552 | bool *error_is_read) | |
beb5f545 VSO |
553 | { |
554 | int ret = 0; | |
5332e5d2 | 555 | bool found_dirty = false; |
42ac2144 | 556 | int64_t end = offset + bytes; |
4ce5dd3e VSO |
557 | AioTaskPool *aio = NULL; |
558 | BlockCopyCallState call_state = {false, false}; | |
beb5f545 VSO |
559 | |
560 | /* | |
561 | * block_copy() user is responsible for keeping source and target in same | |
562 | * aio context | |
563 | */ | |
00e30f05 VSO |
564 | assert(bdrv_get_aio_context(s->source->bs) == |
565 | bdrv_get_aio_context(s->target->bs)); | |
beb5f545 | 566 | |
8719091f | 567 | assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); |
dafaf135 | 568 | assert(QEMU_IS_ALIGNED(bytes, s->cluster_size)); |
beb5f545 | 569 | |
4ce5dd3e VSO |
570 | while (bytes && aio_task_pool_status(aio) == 0) { |
571 | BlockCopyTask *task; | |
42ac2144 | 572 | int64_t status_bytes; |
beb5f545 | 573 | |
4ce5dd3e | 574 | task = block_copy_task_create(s, &call_state, offset, bytes); |
42ac2144 VSO |
575 | if (!task) { |
576 | /* No more dirty bits in the bitmap */ | |
577 | trace_block_copy_skip_range(s, offset, bytes); | |
578 | break; | |
579 | } | |
580 | if (task->offset > offset) { | |
581 | trace_block_copy_skip_range(s, offset, task->offset - offset); | |
beb5f545 VSO |
582 | } |
583 | ||
5332e5d2 VSO |
584 | found_dirty = true; |
585 | ||
42ac2144 VSO |
586 | ret = block_copy_block_status(s, task->offset, task->bytes, |
587 | &status_bytes); | |
5332e5d2 | 588 | assert(ret >= 0); /* never fail */ |
42ac2144 VSO |
589 | if (status_bytes < task->bytes) { |
590 | block_copy_task_shrink(task, status_bytes); | |
591 | } | |
2d57511a | 592 | if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) { |
1348a657 | 593 | block_copy_task_end(task, 0); |
4ce5dd3e | 594 | g_free(task); |
2d57511a VSO |
595 | progress_set_remaining(s->progress, |
596 | bdrv_get_dirty_count(s->copy_bitmap) + | |
597 | s->in_flight_bytes); | |
42ac2144 VSO |
598 | trace_block_copy_skip_range(s, task->offset, task->bytes); |
599 | offset = task_end(task); | |
600 | bytes = end - offset; | |
2d57511a | 601 | continue; |
beb5f545 | 602 | } |
4ce5dd3e | 603 | task->zeroes = ret & BDRV_BLOCK_ZERO; |
beb5f545 | 604 | |
42ac2144 | 605 | trace_block_copy_process(s, task->offset); |
beb5f545 | 606 | |
42ac2144 | 607 | co_get_from_shres(s->mem, task->bytes); |
beb5f545 | 608 | |
42ac2144 VSO |
609 | offset = task_end(task); |
610 | bytes = end - offset; | |
4ce5dd3e VSO |
611 | |
612 | if (!aio && bytes) { | |
613 | aio = aio_task_pool_new(BLOCK_COPY_MAX_WORKERS); | |
614 | } | |
615 | ||
616 | ret = block_copy_task_run(aio, task); | |
617 | if (ret < 0) { | |
618 | goto out; | |
619 | } | |
620 | } | |
621 | ||
622 | out: | |
623 | if (aio) { | |
624 | aio_task_pool_wait_all(aio); | |
625 | ||
626 | /* | |
627 | * We are not really interested in -ECANCELED returned from | |
628 | * block_copy_task_run. If it fails, it means some task already failed | |
629 | * for real reason, let's return first failure. | |
630 | * Still, assert that we don't rewrite failure by success. | |
631 | */ | |
632 | assert(ret == 0 || aio_task_pool_status(aio) < 0); | |
633 | ret = aio_task_pool_status(aio); | |
634 | ||
635 | aio_task_pool_free(aio); | |
636 | } | |
637 | if (error_is_read && ret < 0) { | |
638 | *error_is_read = call_state.error_is_read; | |
beb5f545 VSO |
639 | } |
640 | ||
4ce5dd3e | 641 | return ret < 0 ? ret : found_dirty; |
5332e5d2 VSO |
642 | } |
643 | ||
644 | /* | |
645 | * block_copy | |
646 | * | |
647 | * Copy requested region, accordingly to dirty bitmap. | |
648 | * Collaborate with parallel block_copy requests: if they succeed it will help | |
649 | * us. If they fail, we will retry not-copied regions. So, if we return error, | |
650 | * it means that some I/O operation failed in context of _this_ block_copy call, | |
651 | * not some parallel operation. | |
652 | */ | |
653 | int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes, | |
654 | bool *error_is_read) | |
655 | { | |
656 | int ret; | |
657 | ||
658 | do { | |
659 | ret = block_copy_dirty_clusters(s, offset, bytes, error_is_read); | |
660 | ||
661 | if (ret == 0) { | |
662 | ret = block_copy_wait_one(s, offset, bytes); | |
663 | } | |
664 | ||
665 | /* | |
666 | * We retry in two cases: | |
667 | * 1. Some progress done | |
668 | * Something was copied, which means that there were yield points | |
669 | * and some new dirty bits may have appeared (due to failed parallel | |
670 | * block-copy requests). | |
671 | * 2. We have waited for some intersecting block-copy request | |
672 | * It may have failed and produced new dirty bits. | |
673 | */ | |
674 | } while (ret > 0); | |
a6ffe199 | 675 | |
beb5f545 VSO |
676 | return ret; |
677 | } | |
397f4e9d VSO |
678 | |
679 | BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s) | |
680 | { | |
681 | return s->copy_bitmap; | |
682 | } | |
683 | ||
684 | void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip) | |
685 | { | |
686 | s->skip_unallocated = skip; | |
687 | } |