]>
Commit | Line | Data |
---|---|---|
747ff602 JC |
1 | /* |
2 | * Live block commit | |
3 | * | |
4 | * Copyright Red Hat, Inc. 2012 | |
5 | * | |
6 | * Authors: | |
7 | * Jeff Cody <[email protected]> | |
8 | * Based on stream.c by Stefan Hajnoczi | |
9 | * | |
10 | * This work is licensed under the terms of the GNU LGPL, version 2 or later. | |
11 | * See the COPYING.LIB file in the top-level directory. | |
12 | * | |
13 | */ | |
14 | ||
80c71a24 | 15 | #include "qemu/osdep.h" |
747ff602 | 16 | #include "trace.h" |
737e150e PB |
17 | #include "block/block_int.h" |
18 | #include "block/blockjob.h" | |
da34e65c | 19 | #include "qapi/error.h" |
cc7a8ea7 | 20 | #include "qapi/qmp/qerror.h" |
747ff602 | 21 | #include "qemu/ratelimit.h" |
373340b2 | 22 | #include "sysemu/block-backend.h" |
747ff602 JC |
23 | |
24 | enum { | |
25 | /* | |
26 | * Size of data buffer for populating the image file. This should be large | |
27 | * enough to process multiple clusters in a single call, so that populating | |
28 | * contiguous regions of the image is efficient. | |
29 | */ | |
30 | COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */ | |
31 | }; | |
32 | ||
33 | #define SLICE_TIME 100000000ULL /* ns */ | |
34 | ||
35 | typedef struct CommitBlockJob { | |
36 | BlockJob common; | |
37 | RateLimit limit; | |
38 | BlockDriverState *active; | |
4653456a KW |
39 | BlockBackend *top; |
40 | BlockBackend *base; | |
92aa5c6d | 41 | BlockdevOnError on_error; |
747ff602 JC |
42 | int base_flags; |
43 | int orig_overlay_flags; | |
54e26900 | 44 | char *backing_file_str; |
747ff602 JC |
45 | } CommitBlockJob; |
46 | ||
4653456a | 47 | static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, |
747ff602 JC |
48 | int64_t sector_num, int nb_sectors, |
49 | void *buf) | |
50 | { | |
51 | int ret = 0; | |
4653456a KW |
52 | QEMUIOVector qiov; |
53 | struct iovec iov = { | |
54 | .iov_base = buf, | |
55 | .iov_len = nb_sectors * BDRV_SECTOR_SIZE, | |
56 | }; | |
747ff602 | 57 | |
4653456a KW |
58 | qemu_iovec_init_external(&qiov, &iov, 1); |
59 | ||
60 | ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE, | |
61 | qiov.size, &qiov, 0); | |
62 | if (ret < 0) { | |
747ff602 JC |
63 | return ret; |
64 | } | |
65 | ||
4653456a KW |
66 | ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE, |
67 | qiov.size, &qiov, 0); | |
68 | if (ret < 0) { | |
747ff602 JC |
69 | return ret; |
70 | } | |
71 | ||
72 | return 0; | |
73 | } | |
74 | ||
9e85cd5c SH |
75 | typedef struct { |
76 | int ret; | |
77 | } CommitCompleteData; | |
78 | ||
79 | static void commit_complete(BlockJob *job, void *opaque) | |
747ff602 | 80 | { |
9e85cd5c SH |
81 | CommitBlockJob *s = container_of(job, CommitBlockJob, common); |
82 | CommitCompleteData *data = opaque; | |
747ff602 | 83 | BlockDriverState *active = s->active; |
4653456a KW |
84 | BlockDriverState *top = blk_bs(s->top); |
85 | BlockDriverState *base = blk_bs(s->base); | |
6d759117 | 86 | BlockDriverState *overlay_bs; |
9e85cd5c SH |
87 | int ret = data->ret; |
88 | ||
89 | if (!block_job_is_cancelled(&s->common) && ret == 0) { | |
90 | /* success */ | |
91 | ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str); | |
92 | } | |
93 | ||
94 | /* restore base open flags here if appropriate (e.g., change the base back | |
95 | * to r/o). These reopens do not need to be atomic, since we won't abort | |
96 | * even on failure here */ | |
97 | if (s->base_flags != bdrv_get_flags(base)) { | |
98 | bdrv_reopen(base, s->base_flags, NULL); | |
99 | } | |
100 | overlay_bs = bdrv_find_overlay(active, top); | |
101 | if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { | |
102 | bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); | |
103 | } | |
104 | g_free(s->backing_file_str); | |
4653456a KW |
105 | blk_unref(s->top); |
106 | blk_unref(s->base); | |
9e85cd5c SH |
107 | block_job_completed(&s->common, ret); |
108 | g_free(data); | |
109 | } | |
110 | ||
111 | static void coroutine_fn commit_run(void *opaque) | |
112 | { | |
113 | CommitBlockJob *s = opaque; | |
114 | CommitCompleteData *data; | |
747ff602 | 115 | int64_t sector_num, end; |
f14a39cc | 116 | uint64_t delay_ns = 0; |
747ff602 JC |
117 | int ret = 0; |
118 | int n = 0; | |
9e85cd5c | 119 | void *buf = NULL; |
747ff602 JC |
120 | int bytes_written = 0; |
121 | int64_t base_len; | |
122 | ||
4653456a | 123 | ret = s->common.len = blk_getlength(s->top); |
747ff602 JC |
124 | |
125 | ||
126 | if (s->common.len < 0) { | |
9e85cd5c | 127 | goto out; |
747ff602 JC |
128 | } |
129 | ||
4653456a | 130 | ret = base_len = blk_getlength(s->base); |
747ff602 | 131 | if (base_len < 0) { |
9e85cd5c | 132 | goto out; |
747ff602 JC |
133 | } |
134 | ||
135 | if (base_len < s->common.len) { | |
4653456a | 136 | ret = blk_truncate(s->base, s->common.len); |
747ff602 | 137 | if (ret) { |
9e85cd5c | 138 | goto out; |
747ff602 JC |
139 | } |
140 | } | |
141 | ||
747ff602 | 142 | end = s->common.len >> BDRV_SECTOR_BITS; |
4653456a | 143 | buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); |
747ff602 JC |
144 | |
145 | for (sector_num = 0; sector_num < end; sector_num += n) { | |
747ff602 JC |
146 | bool copy; |
147 | ||
747ff602 | 148 | /* Note that even when no rate limit is applied we need to yield |
c57b6656 | 149 | * with no pending I/O here so that bdrv_drain_all() returns. |
747ff602 | 150 | */ |
7483d1e5 | 151 | block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); |
747ff602 JC |
152 | if (block_job_is_cancelled(&s->common)) { |
153 | break; | |
154 | } | |
155 | /* Copy if allocated above the base */ | |
4653456a KW |
156 | ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), |
157 | sector_num, | |
4f578637 PB |
158 | COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, |
159 | &n); | |
747ff602 JC |
160 | copy = (ret == 1); |
161 | trace_commit_one_iteration(s, sector_num, n, ret); | |
162 | if (copy) { | |
4653456a | 163 | ret = commit_populate(s->top, s->base, sector_num, n, buf); |
747ff602 JC |
164 | bytes_written += n * BDRV_SECTOR_SIZE; |
165 | } | |
166 | if (ret < 0) { | |
1e8fb7f1 KW |
167 | BlockErrorAction action = |
168 | block_job_error_action(&s->common, false, s->on_error, -ret); | |
169 | if (action == BLOCK_ERROR_ACTION_REPORT) { | |
9e85cd5c | 170 | goto out; |
747ff602 JC |
171 | } else { |
172 | n = 0; | |
173 | continue; | |
174 | } | |
175 | } | |
176 | /* Publish progress */ | |
177 | s->common.offset += n * BDRV_SECTOR_SIZE; | |
f14a39cc SS |
178 | |
179 | if (copy && s->common.speed) { | |
180 | delay_ns = ratelimit_calculate_delay(&s->limit, n); | |
181 | } | |
747ff602 JC |
182 | } |
183 | ||
184 | ret = 0; | |
185 | ||
9e85cd5c | 186 | out: |
747ff602 JC |
187 | qemu_vfree(buf); |
188 | ||
9e85cd5c SH |
189 | data = g_malloc(sizeof(*data)); |
190 | data->ret = ret; | |
191 | block_job_defer_to_main_loop(&s->common, commit_complete, data); | |
747ff602 JC |
192 | } |
193 | ||
194 | static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) | |
195 | { | |
196 | CommitBlockJob *s = container_of(job, CommitBlockJob, common); | |
197 | ||
198 | if (speed < 0) { | |
c6bd8c70 | 199 | error_setg(errp, QERR_INVALID_PARAMETER, "speed"); |
747ff602 JC |
200 | return; |
201 | } | |
202 | ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); | |
203 | } | |
204 | ||
3fc4b10a | 205 | static const BlockJobDriver commit_job_driver = { |
747ff602 | 206 | .instance_size = sizeof(CommitBlockJob), |
79e14bf7 | 207 | .job_type = BLOCK_JOB_TYPE_COMMIT, |
747ff602 JC |
208 | .set_speed = commit_set_speed, |
209 | }; | |
210 | ||
fd62c609 AG |
211 | void commit_start(const char *job_id, BlockDriverState *bs, |
212 | BlockDriverState *base, BlockDriverState *top, int64_t speed, | |
097310b5 | 213 | BlockdevOnError on_error, BlockCompletionFunc *cb, |
54e26900 | 214 | void *opaque, const char *backing_file_str, Error **errp) |
747ff602 JC |
215 | { |
216 | CommitBlockJob *s; | |
217 | BlockReopenQueue *reopen_queue = NULL; | |
218 | int orig_overlay_flags; | |
219 | int orig_base_flags; | |
220 | BlockDriverState *overlay_bs; | |
221 | Error *local_err = NULL; | |
222 | ||
18da7f94 | 223 | assert(top != bs); |
747ff602 JC |
224 | if (top == base) { |
225 | error_setg(errp, "Invalid files for merge: top and base are the same"); | |
226 | return; | |
227 | } | |
228 | ||
747ff602 JC |
229 | overlay_bs = bdrv_find_overlay(bs, top); |
230 | ||
231 | if (overlay_bs == NULL) { | |
232 | error_setg(errp, "Could not find overlay image for %s:", top->filename); | |
233 | return; | |
234 | } | |
235 | ||
fd62c609 AG |
236 | s = block_job_create(job_id, &commit_job_driver, bs, speed, |
237 | cb, opaque, errp); | |
834fe28d AG |
238 | if (!s) { |
239 | return; | |
240 | } | |
241 | ||
747ff602 JC |
242 | orig_base_flags = bdrv_get_flags(base); |
243 | orig_overlay_flags = bdrv_get_flags(overlay_bs); | |
244 | ||
245 | /* convert base & overlay_bs to r/w, if necessary */ | |
747ff602 | 246 | if (!(orig_overlay_flags & BDRV_O_RDWR)) { |
4d2cb092 | 247 | reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL, |
747ff602 JC |
248 | orig_overlay_flags | BDRV_O_RDWR); |
249 | } | |
3db2bd55 AG |
250 | if (!(orig_base_flags & BDRV_O_RDWR)) { |
251 | reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, | |
252 | orig_base_flags | BDRV_O_RDWR); | |
253 | } | |
747ff602 JC |
254 | if (reopen_queue) { |
255 | bdrv_reopen_multiple(reopen_queue, &local_err); | |
256 | if (local_err != NULL) { | |
257 | error_propagate(errp, local_err); | |
834fe28d | 258 | block_job_unref(&s->common); |
747ff602 JC |
259 | return; |
260 | } | |
261 | } | |
262 | ||
263 | ||
4653456a KW |
264 | s->base = blk_new(); |
265 | blk_insert_bs(s->base, base); | |
266 | ||
267 | s->top = blk_new(); | |
268 | blk_insert_bs(s->top, top); | |
269 | ||
747ff602 JC |
270 | s->active = bs; |
271 | ||
272 | s->base_flags = orig_base_flags; | |
273 | s->orig_overlay_flags = orig_overlay_flags; | |
274 | ||
54e26900 JC |
275 | s->backing_file_str = g_strdup(backing_file_str); |
276 | ||
747ff602 | 277 | s->on_error = on_error; |
0b8b8753 | 278 | s->common.co = qemu_coroutine_create(commit_run, s); |
747ff602 JC |
279 | |
280 | trace_commit_start(bs, base, top, s, s->common.co, opaque); | |
0b8b8753 | 281 | qemu_coroutine_enter(s->common.co); |
747ff602 | 282 | } |
83fd6dd3 KW |
283 | |
284 | ||
285 | #define COMMIT_BUF_SECTORS 2048 | |
286 | ||
287 | /* commit COW file into the raw image */ | |
288 | int bdrv_commit(BlockDriverState *bs) | |
289 | { | |
f8e2bd53 | 290 | BlockBackend *src, *backing; |
83fd6dd3 KW |
291 | BlockDriver *drv = bs->drv; |
292 | int64_t sector, total_sectors, length, backing_length; | |
293 | int n, ro, open_flags; | |
294 | int ret = 0; | |
295 | uint8_t *buf = NULL; | |
296 | ||
297 | if (!drv) | |
298 | return -ENOMEDIUM; | |
299 | ||
300 | if (!bs->backing) { | |
301 | return -ENOTSUP; | |
302 | } | |
303 | ||
304 | if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || | |
305 | bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { | |
306 | return -EBUSY; | |
307 | } | |
308 | ||
309 | ro = bs->backing->bs->read_only; | |
310 | open_flags = bs->backing->bs->open_flags; | |
311 | ||
312 | if (ro) { | |
313 | if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) { | |
314 | return -EACCES; | |
315 | } | |
316 | } | |
317 | ||
f8e2bd53 KW |
318 | src = blk_new(); |
319 | blk_insert_bs(src, bs); | |
320 | ||
321 | backing = blk_new(); | |
322 | blk_insert_bs(backing, bs->backing->bs); | |
323 | ||
324 | length = blk_getlength(src); | |
83fd6dd3 KW |
325 | if (length < 0) { |
326 | ret = length; | |
327 | goto ro_cleanup; | |
328 | } | |
329 | ||
f8e2bd53 | 330 | backing_length = blk_getlength(backing); |
83fd6dd3 KW |
331 | if (backing_length < 0) { |
332 | ret = backing_length; | |
333 | goto ro_cleanup; | |
334 | } | |
335 | ||
336 | /* If our top snapshot is larger than the backing file image, | |
337 | * grow the backing file image if possible. If not possible, | |
338 | * we must return an error */ | |
339 | if (length > backing_length) { | |
f8e2bd53 | 340 | ret = blk_truncate(backing, length); |
83fd6dd3 KW |
341 | if (ret < 0) { |
342 | goto ro_cleanup; | |
343 | } | |
344 | } | |
345 | ||
346 | total_sectors = length >> BDRV_SECTOR_BITS; | |
347 | ||
f8e2bd53 KW |
348 | /* blk_try_blockalign() for src will choose an alignment that works for |
349 | * backing as well, so no need to compare the alignment manually. */ | |
350 | buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); | |
83fd6dd3 KW |
351 | if (buf == NULL) { |
352 | ret = -ENOMEM; | |
353 | goto ro_cleanup; | |
354 | } | |
355 | ||
356 | for (sector = 0; sector < total_sectors; sector += n) { | |
357 | ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); | |
358 | if (ret < 0) { | |
359 | goto ro_cleanup; | |
360 | } | |
361 | if (ret) { | |
f8e2bd53 KW |
362 | ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf, |
363 | n * BDRV_SECTOR_SIZE); | |
83fd6dd3 KW |
364 | if (ret < 0) { |
365 | goto ro_cleanup; | |
366 | } | |
367 | ||
f8e2bd53 KW |
368 | ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf, |
369 | n * BDRV_SECTOR_SIZE, 0); | |
83fd6dd3 KW |
370 | if (ret < 0) { |
371 | goto ro_cleanup; | |
372 | } | |
373 | } | |
374 | } | |
375 | ||
376 | if (drv->bdrv_make_empty) { | |
377 | ret = drv->bdrv_make_empty(bs); | |
378 | if (ret < 0) { | |
379 | goto ro_cleanup; | |
380 | } | |
f8e2bd53 | 381 | blk_flush(src); |
83fd6dd3 KW |
382 | } |
383 | ||
384 | /* | |
385 | * Make sure all data we wrote to the backing device is actually | |
386 | * stable on disk. | |
387 | */ | |
f8e2bd53 | 388 | blk_flush(backing); |
83fd6dd3 KW |
389 | |
390 | ret = 0; | |
391 | ro_cleanup: | |
392 | qemu_vfree(buf); | |
393 | ||
f8e2bd53 KW |
394 | blk_unref(src); |
395 | blk_unref(backing); | |
396 | ||
83fd6dd3 KW |
397 | if (ro) { |
398 | /* ignoring error return here */ | |
399 | bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL); | |
400 | } | |
401 | ||
402 | return ret; | |
403 | } |