]>
Commit | Line | Data |
---|---|---|
98d2c6f2 DM |
1 | /* |
2 | * QEMU backup | |
3 | * | |
4 | * Copyright (C) 2013 Proxmox Server Solutions | |
5 | * | |
6 | * Authors: | |
7 | * Dietmar Maurer ([email protected]) | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include <stdio.h> | |
15 | #include <errno.h> | |
16 | #include <unistd.h> | |
17 | ||
18 | #include "trace.h" | |
19 | #include "block/block.h" | |
20 | #include "block/block_int.h" | |
21 | #include "block/blockjob.h" | |
22 | #include "qemu/ratelimit.h" | |
23 | ||
24 | #define BACKUP_CLUSTER_BITS 16 | |
25 | #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS) | |
26 | #define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE) | |
27 | ||
28 | #define SLICE_TIME 100000000ULL /* ns */ | |
29 | ||
30 | typedef struct CowRequest { | |
31 | int64_t start; | |
32 | int64_t end; | |
33 | QLIST_ENTRY(CowRequest) list; | |
34 | CoQueue wait_queue; /* coroutines blocked on this request */ | |
35 | } CowRequest; | |
36 | ||
37 | typedef struct BackupBlockJob { | |
38 | BlockJob common; | |
39 | BlockDriverState *target; | |
40 | RateLimit limit; | |
41 | BlockdevOnError on_source_error; | |
42 | BlockdevOnError on_target_error; | |
43 | CoRwlock flush_rwlock; | |
44 | uint64_t sectors_read; | |
45 | HBitmap *bitmap; | |
46 | QLIST_HEAD(, CowRequest) inflight_reqs; | |
47 | } BackupBlockJob; | |
48 | ||
49 | /* See if in-flight requests overlap and wait for them to complete */ | |
50 | static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job, | |
51 | int64_t start, | |
52 | int64_t end) | |
53 | { | |
54 | CowRequest *req; | |
55 | bool retry; | |
56 | ||
57 | do { | |
58 | retry = false; | |
59 | QLIST_FOREACH(req, &job->inflight_reqs, list) { | |
60 | if (end > req->start && start < req->end) { | |
61 | qemu_co_queue_wait(&req->wait_queue); | |
62 | retry = true; | |
63 | break; | |
64 | } | |
65 | } | |
66 | } while (retry); | |
67 | } | |
68 | ||
69 | /* Keep track of an in-flight request */ | |
70 | static void cow_request_begin(CowRequest *req, BackupBlockJob *job, | |
71 | int64_t start, int64_t end) | |
72 | { | |
73 | req->start = start; | |
74 | req->end = end; | |
75 | qemu_co_queue_init(&req->wait_queue); | |
76 | QLIST_INSERT_HEAD(&job->inflight_reqs, req, list); | |
77 | } | |
78 | ||
79 | /* Forget about a completed request */ | |
80 | static void cow_request_end(CowRequest *req) | |
81 | { | |
82 | QLIST_REMOVE(req, list); | |
83 | qemu_co_queue_restart_all(&req->wait_queue); | |
84 | } | |
85 | ||
86 | static int coroutine_fn backup_do_cow(BlockDriverState *bs, | |
87 | int64_t sector_num, int nb_sectors, | |
88 | bool *error_is_read) | |
89 | { | |
90 | BackupBlockJob *job = (BackupBlockJob *)bs->job; | |
91 | CowRequest cow_request; | |
92 | struct iovec iov; | |
93 | QEMUIOVector bounce_qiov; | |
94 | void *bounce_buffer = NULL; | |
95 | int ret = 0; | |
96 | int64_t start, end; | |
97 | int n; | |
98 | ||
99 | qemu_co_rwlock_rdlock(&job->flush_rwlock); | |
100 | ||
101 | start = sector_num / BACKUP_SECTORS_PER_CLUSTER; | |
102 | end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER); | |
103 | ||
104 | trace_backup_do_cow_enter(job, start, sector_num, nb_sectors); | |
105 | ||
106 | wait_for_overlapping_requests(job, start, end); | |
107 | cow_request_begin(&cow_request, job, start, end); | |
108 | ||
109 | for (; start < end; start++) { | |
110 | if (hbitmap_get(job->bitmap, start)) { | |
111 | trace_backup_do_cow_skip(job, start); | |
112 | continue; /* already copied */ | |
113 | } | |
114 | ||
115 | trace_backup_do_cow_process(job, start); | |
116 | ||
117 | n = MIN(BACKUP_SECTORS_PER_CLUSTER, | |
118 | job->common.len / BDRV_SECTOR_SIZE - | |
119 | start * BACKUP_SECTORS_PER_CLUSTER); | |
120 | ||
121 | if (!bounce_buffer) { | |
122 | bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE); | |
123 | } | |
124 | iov.iov_base = bounce_buffer; | |
125 | iov.iov_len = n * BDRV_SECTOR_SIZE; | |
126 | qemu_iovec_init_external(&bounce_qiov, &iov, 1); | |
127 | ||
128 | ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n, | |
129 | &bounce_qiov); | |
130 | if (ret < 0) { | |
131 | trace_backup_do_cow_read_fail(job, start, ret); | |
132 | if (error_is_read) { | |
133 | *error_is_read = true; | |
134 | } | |
135 | goto out; | |
136 | } | |
137 | ||
138 | if (buffer_is_zero(iov.iov_base, iov.iov_len)) { | |
139 | ret = bdrv_co_write_zeroes(job->target, | |
140 | start * BACKUP_SECTORS_PER_CLUSTER, n); | |
141 | } else { | |
142 | ret = bdrv_co_writev(job->target, | |
143 | start * BACKUP_SECTORS_PER_CLUSTER, n, | |
144 | &bounce_qiov); | |
145 | } | |
146 | if (ret < 0) { | |
147 | trace_backup_do_cow_write_fail(job, start, ret); | |
148 | if (error_is_read) { | |
149 | *error_is_read = false; | |
150 | } | |
151 | goto out; | |
152 | } | |
153 | ||
154 | hbitmap_set(job->bitmap, start, 1); | |
155 | ||
156 | /* Publish progress, guest I/O counts as progress too. Note that the | |
157 | * offset field is an opaque progress value, it is not a disk offset. | |
158 | */ | |
159 | job->sectors_read += n; | |
160 | job->common.offset += n * BDRV_SECTOR_SIZE; | |
161 | } | |
162 | ||
163 | out: | |
164 | if (bounce_buffer) { | |
165 | qemu_vfree(bounce_buffer); | |
166 | } | |
167 | ||
168 | cow_request_end(&cow_request); | |
169 | ||
170 | trace_backup_do_cow_return(job, sector_num, nb_sectors, ret); | |
171 | ||
172 | qemu_co_rwlock_unlock(&job->flush_rwlock); | |
173 | ||
174 | return ret; | |
175 | } | |
176 | ||
177 | static int coroutine_fn backup_before_write_notify( | |
178 | NotifierWithReturn *notifier, | |
179 | void *opaque) | |
180 | { | |
181 | BdrvTrackedRequest *req = opaque; | |
182 | ||
183 | return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL); | |
184 | } | |
185 | ||
186 | static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) | |
187 | { | |
188 | BackupBlockJob *s = container_of(job, BackupBlockJob, common); | |
189 | ||
190 | if (speed < 0) { | |
191 | error_set(errp, QERR_INVALID_PARAMETER, "speed"); | |
192 | return; | |
193 | } | |
194 | ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); | |
195 | } | |
196 | ||
197 | static void backup_iostatus_reset(BlockJob *job) | |
198 | { | |
199 | BackupBlockJob *s = container_of(job, BackupBlockJob, common); | |
200 | ||
201 | bdrv_iostatus_reset(s->target); | |
202 | } | |
203 | ||
204 | static const BlockJobType backup_job_type = { | |
205 | .instance_size = sizeof(BackupBlockJob), | |
206 | .job_type = "backup", | |
207 | .set_speed = backup_set_speed, | |
208 | .iostatus_reset = backup_iostatus_reset, | |
209 | }; | |
210 | ||
211 | static BlockErrorAction backup_error_action(BackupBlockJob *job, | |
212 | bool read, int error) | |
213 | { | |
214 | if (read) { | |
215 | return block_job_error_action(&job->common, job->common.bs, | |
216 | job->on_source_error, true, error); | |
217 | } else { | |
218 | return block_job_error_action(&job->common, job->target, | |
219 | job->on_target_error, false, error); | |
220 | } | |
221 | } | |
222 | ||
223 | static void coroutine_fn backup_run(void *opaque) | |
224 | { | |
225 | BackupBlockJob *job = opaque; | |
226 | BlockDriverState *bs = job->common.bs; | |
227 | BlockDriverState *target = job->target; | |
228 | BlockdevOnError on_target_error = job->on_target_error; | |
229 | NotifierWithReturn before_write = { | |
230 | .notify = backup_before_write_notify, | |
231 | }; | |
232 | int64_t start, end; | |
233 | int ret = 0; | |
234 | ||
235 | QLIST_INIT(&job->inflight_reqs); | |
236 | qemu_co_rwlock_init(&job->flush_rwlock); | |
237 | ||
238 | start = 0; | |
239 | end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE, | |
240 | BACKUP_SECTORS_PER_CLUSTER); | |
241 | ||
242 | job->bitmap = hbitmap_alloc(end, 0); | |
243 | ||
244 | bdrv_set_enable_write_cache(target, true); | |
245 | bdrv_set_on_error(target, on_target_error, on_target_error); | |
246 | bdrv_iostatus_enable(target); | |
247 | ||
248 | bdrv_add_before_write_notifier(bs, &before_write); | |
249 | ||
250 | for (; start < end; start++) { | |
251 | bool error_is_read; | |
252 | ||
253 | if (block_job_is_cancelled(&job->common)) { | |
254 | break; | |
255 | } | |
256 | ||
257 | /* we need to yield so that qemu_aio_flush() returns. | |
258 | * (without, VM does not reboot) | |
259 | */ | |
260 | if (job->common.speed) { | |
261 | uint64_t delay_ns = ratelimit_calculate_delay( | |
262 | &job->limit, job->sectors_read); | |
263 | job->sectors_read = 0; | |
264 | block_job_sleep_ns(&job->common, rt_clock, delay_ns); | |
265 | } else { | |
266 | block_job_sleep_ns(&job->common, rt_clock, 0); | |
267 | } | |
268 | ||
269 | if (block_job_is_cancelled(&job->common)) { | |
270 | break; | |
271 | } | |
272 | ||
273 | ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, | |
274 | BACKUP_SECTORS_PER_CLUSTER, &error_is_read); | |
275 | if (ret < 0) { | |
276 | /* Depending on error action, fail now or retry cluster */ | |
277 | BlockErrorAction action = | |
278 | backup_error_action(job, error_is_read, -ret); | |
279 | if (action == BDRV_ACTION_REPORT) { | |
280 | break; | |
281 | } else { | |
282 | start--; | |
283 | continue; | |
284 | } | |
285 | } | |
286 | } | |
287 | ||
288 | notifier_with_return_remove(&before_write); | |
289 | ||
290 | /* wait until pending backup_do_cow() calls have completed */ | |
291 | qemu_co_rwlock_wrlock(&job->flush_rwlock); | |
292 | qemu_co_rwlock_unlock(&job->flush_rwlock); | |
293 | ||
294 | hbitmap_free(job->bitmap); | |
295 | ||
296 | bdrv_iostatus_disable(target); | |
297 | bdrv_delete(target); | |
298 | ||
299 | block_job_completed(&job->common, ret); | |
300 | } | |
301 | ||
302 | void backup_start(BlockDriverState *bs, BlockDriverState *target, | |
303 | int64_t speed, | |
304 | BlockdevOnError on_source_error, | |
305 | BlockdevOnError on_target_error, | |
306 | BlockDriverCompletionFunc *cb, void *opaque, | |
307 | Error **errp) | |
308 | { | |
309 | int64_t len; | |
310 | ||
311 | assert(bs); | |
312 | assert(target); | |
313 | assert(cb); | |
314 | ||
315 | if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || | |
316 | on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && | |
317 | !bdrv_iostatus_is_enabled(bs)) { | |
318 | error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); | |
319 | return; | |
320 | } | |
321 | ||
322 | len = bdrv_getlength(bs); | |
323 | if (len < 0) { | |
324 | error_setg_errno(errp, -len, "unable to get length for '%s'", | |
325 | bdrv_get_device_name(bs)); | |
326 | return; | |
327 | } | |
328 | ||
329 | BackupBlockJob *job = block_job_create(&backup_job_type, bs, speed, | |
330 | cb, opaque, errp); | |
331 | if (!job) { | |
332 | return; | |
333 | } | |
334 | ||
335 | job->on_source_error = on_source_error; | |
336 | job->on_target_error = on_target_error; | |
337 | job->target = target; | |
338 | job->common.len = len; | |
339 | job->common.co = qemu_coroutine_create(backup_run); | |
340 | qemu_coroutine_enter(job->common.co, job); | |
341 | } |