]>
Commit | Line | Data |
---|---|---|
893f7eba PB |
1 | /* |
2 | * Image mirroring | |
3 | * | |
4 | * Copyright Red Hat, Inc. 2012 | |
5 | * | |
6 | * Authors: | |
7 | * Paolo Bonzini <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU LGPL, version 2 or later. | |
10 | * See the COPYING.LIB file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include "trace.h" | |
15 | #include "blockjob.h" | |
16 | #include "block_int.h" | |
17 | #include "qemu/ratelimit.h" | |
18 | ||
19 | enum { | |
20 | /* | |
21 | * Size of data buffer for populating the image file. This should be large | |
22 | * enough to process multiple clusters in a single call, so that populating | |
23 | * contiguous regions of the image is efficient. | |
24 | */ | |
25 | BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ | |
26 | }; | |
27 | ||
28 | #define SLICE_TIME 100000000ULL /* ns */ | |
29 | ||
30 | typedef struct MirrorBlockJob { | |
31 | BlockJob common; | |
32 | RateLimit limit; | |
33 | BlockDriverState *target; | |
34 | MirrorSyncMode mode; | |
d63ffd87 PB |
35 | bool synced; |
36 | bool should_complete; | |
893f7eba PB |
37 | int64_t sector_num; |
38 | uint8_t *buf; | |
39 | } MirrorBlockJob; | |
40 | ||
41 | static int coroutine_fn mirror_iteration(MirrorBlockJob *s) | |
42 | { | |
43 | BlockDriverState *source = s->common.bs; | |
44 | BlockDriverState *target = s->target; | |
45 | QEMUIOVector qiov; | |
46 | int ret, nb_sectors; | |
47 | int64_t end; | |
48 | struct iovec iov; | |
49 | ||
50 | end = s->common.len >> BDRV_SECTOR_BITS; | |
51 | s->sector_num = bdrv_get_next_dirty(source, s->sector_num); | |
52 | nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); | |
53 | bdrv_reset_dirty(source, s->sector_num, nb_sectors); | |
54 | ||
55 | /* Copy the dirty cluster. */ | |
56 | iov.iov_base = s->buf; | |
57 | iov.iov_len = nb_sectors * 512; | |
58 | qemu_iovec_init_external(&qiov, &iov, 1); | |
59 | ||
60 | trace_mirror_one_iteration(s, s->sector_num, nb_sectors); | |
61 | ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); | |
62 | if (ret < 0) { | |
63 | return ret; | |
64 | } | |
65 | return bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); | |
66 | } | |
67 | ||
68 | static void coroutine_fn mirror_run(void *opaque) | |
69 | { | |
70 | MirrorBlockJob *s = opaque; | |
71 | BlockDriverState *bs = s->common.bs; | |
72 | int64_t sector_num, end; | |
73 | int ret = 0; | |
74 | int n; | |
893f7eba PB |
75 | |
76 | if (block_job_is_cancelled(&s->common)) { | |
77 | goto immediate_exit; | |
78 | } | |
79 | ||
80 | s->common.len = bdrv_getlength(bs); | |
81 | if (s->common.len < 0) { | |
82 | block_job_completed(&s->common, s->common.len); | |
83 | return; | |
84 | } | |
85 | ||
86 | end = s->common.len >> BDRV_SECTOR_BITS; | |
87 | s->buf = qemu_blockalign(bs, BLOCK_SIZE); | |
88 | ||
89 | if (s->mode != MIRROR_SYNC_MODE_NONE) { | |
90 | /* First part, loop on the sectors and initialize the dirty bitmap. */ | |
91 | BlockDriverState *base; | |
92 | base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; | |
93 | for (sector_num = 0; sector_num < end; ) { | |
94 | int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; | |
95 | ret = bdrv_co_is_allocated_above(bs, base, | |
96 | sector_num, next - sector_num, &n); | |
97 | ||
98 | if (ret < 0) { | |
99 | goto immediate_exit; | |
100 | } | |
101 | ||
102 | assert(n > 0); | |
103 | if (ret == 1) { | |
104 | bdrv_set_dirty(bs, sector_num, n); | |
105 | sector_num = next; | |
106 | } else { | |
107 | sector_num += n; | |
108 | } | |
109 | } | |
110 | } | |
111 | ||
112 | s->sector_num = -1; | |
113 | for (;;) { | |
114 | uint64_t delay_ns; | |
115 | int64_t cnt; | |
116 | bool should_complete; | |
117 | ||
118 | cnt = bdrv_get_dirty_count(bs); | |
119 | if (cnt != 0) { | |
120 | ret = mirror_iteration(s); | |
121 | if (ret < 0) { | |
122 | goto immediate_exit; | |
123 | } | |
124 | cnt = bdrv_get_dirty_count(bs); | |
125 | } | |
126 | ||
127 | should_complete = false; | |
128 | if (cnt == 0) { | |
129 | trace_mirror_before_flush(s); | |
130 | ret = bdrv_flush(s->target); | |
131 | if (ret < 0) { | |
132 | goto immediate_exit; | |
133 | } | |
134 | ||
135 | /* We're out of the streaming phase. From now on, if the job | |
136 | * is cancelled we will actually complete all pending I/O and | |
137 | * report completion. This way, block-job-cancel will leave | |
138 | * the target in a consistent state. | |
139 | */ | |
893f7eba | 140 | s->common.offset = end * BDRV_SECTOR_SIZE; |
d63ffd87 PB |
141 | if (!s->synced) { |
142 | block_job_ready(&s->common); | |
143 | s->synced = true; | |
144 | } | |
145 | ||
146 | should_complete = s->should_complete || | |
147 | block_job_is_cancelled(&s->common); | |
893f7eba PB |
148 | cnt = bdrv_get_dirty_count(bs); |
149 | } | |
150 | ||
151 | if (cnt == 0 && should_complete) { | |
152 | /* The dirty bitmap is not updated while operations are pending. | |
153 | * If we're about to exit, wait for pending operations before | |
154 | * calling bdrv_get_dirty_count(bs), or we may exit while the | |
155 | * source has dirty data to copy! | |
156 | * | |
157 | * Note that I/O can be submitted by the guest while | |
158 | * mirror_populate runs. | |
159 | */ | |
160 | trace_mirror_before_drain(s, cnt); | |
161 | bdrv_drain_all(); | |
162 | cnt = bdrv_get_dirty_count(bs); | |
163 | } | |
164 | ||
165 | ret = 0; | |
d63ffd87 PB |
166 | trace_mirror_before_sleep(s, cnt, s->synced); |
167 | if (!s->synced) { | |
893f7eba PB |
168 | /* Publish progress */ |
169 | s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; | |
170 | ||
171 | if (s->common.speed) { | |
172 | delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); | |
173 | } else { | |
174 | delay_ns = 0; | |
175 | } | |
176 | ||
177 | /* Note that even when no rate limit is applied we need to yield | |
178 | * with no pending I/O here so that qemu_aio_flush() returns. | |
179 | */ | |
180 | block_job_sleep_ns(&s->common, rt_clock, delay_ns); | |
181 | if (block_job_is_cancelled(&s->common)) { | |
182 | break; | |
183 | } | |
184 | } else if (!should_complete) { | |
185 | delay_ns = (cnt == 0 ? SLICE_TIME : 0); | |
186 | block_job_sleep_ns(&s->common, rt_clock, delay_ns); | |
187 | } else if (cnt == 0) { | |
188 | /* The two disks are in sync. Exit and report successful | |
189 | * completion. | |
190 | */ | |
191 | assert(QLIST_EMPTY(&bs->tracked_requests)); | |
192 | s->common.cancelled = false; | |
193 | break; | |
194 | } | |
195 | } | |
196 | ||
197 | immediate_exit: | |
198 | g_free(s->buf); | |
199 | bdrv_set_dirty_tracking(bs, false); | |
d63ffd87 PB |
200 | if (s->should_complete && ret == 0) { |
201 | if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { | |
202 | bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); | |
203 | } | |
204 | bdrv_swap(s->target, s->common.bs); | |
205 | } | |
893f7eba PB |
206 | bdrv_close(s->target); |
207 | bdrv_delete(s->target); | |
208 | block_job_completed(&s->common, ret); | |
209 | } | |
210 | ||
211 | static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) | |
212 | { | |
213 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | |
214 | ||
215 | if (speed < 0) { | |
216 | error_set(errp, QERR_INVALID_PARAMETER, "speed"); | |
217 | return; | |
218 | } | |
219 | ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); | |
220 | } | |
221 | ||
d63ffd87 PB |
222 | static void mirror_complete(BlockJob *job, Error **errp) |
223 | { | |
224 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | |
225 | int ret; | |
226 | ||
227 | ret = bdrv_open_backing_file(s->target); | |
228 | if (ret < 0) { | |
229 | char backing_filename[PATH_MAX]; | |
230 | bdrv_get_full_backing_filename(s->target, backing_filename, | |
231 | sizeof(backing_filename)); | |
232 | error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); | |
233 | return; | |
234 | } | |
235 | if (!s->synced) { | |
236 | error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); | |
237 | return; | |
238 | } | |
239 | ||
240 | s->should_complete = true; | |
241 | block_job_resume(job); | |
242 | } | |
243 | ||
893f7eba PB |
244 | static BlockJobType mirror_job_type = { |
245 | .instance_size = sizeof(MirrorBlockJob), | |
246 | .job_type = "mirror", | |
247 | .set_speed = mirror_set_speed, | |
d63ffd87 | 248 | .complete = mirror_complete, |
893f7eba PB |
249 | }; |
250 | ||
251 | void mirror_start(BlockDriverState *bs, BlockDriverState *target, | |
252 | int64_t speed, MirrorSyncMode mode, | |
253 | BlockDriverCompletionFunc *cb, | |
254 | void *opaque, Error **errp) | |
255 | { | |
256 | MirrorBlockJob *s; | |
257 | ||
258 | s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); | |
259 | if (!s) { | |
260 | return; | |
261 | } | |
262 | ||
263 | s->target = target; | |
264 | s->mode = mode; | |
265 | bdrv_set_dirty_tracking(bs, true); | |
266 | bdrv_set_enable_write_cache(s->target, true); | |
267 | s->common.co = qemu_coroutine_create(mirror_run); | |
268 | trace_mirror_start(bs, s, s->common.co, opaque); | |
269 | qemu_coroutine_enter(s->common.co, s); | |
270 | } |