]>
Commit | Line | Data |
---|---|---|
35a6ed4f HZ |
1 | /* |
2 | * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | |
3 | * (a.k.a. Fault Tolerance or Continuous Replication) | |
4 | * | |
5 | * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | |
6 | * Copyright (c) 2016 FUJITSU LIMITED | |
7 | * Copyright (c) 2016 Intel Corporation | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
10 | * later. See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | #include "qemu/osdep.h" | |
18cc23d7 | 14 | #include "qemu/timer.h" |
0b827d5e | 15 | #include "sysemu/sysemu.h" |
35a6ed4f | 16 | #include "migration/colo.h" |
a91246c9 | 17 | #include "io/channel-buffer.h" |
0b827d5e | 18 | #include "trace.h" |
56ba83d2 | 19 | #include "qemu/error-report.h" |
4f97558e | 20 | #include "qapi/error.h" |
35a6ed4f | 21 | |
a91246c9 HZ |
22 | #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) |
23 | ||
35a6ed4f HZ |
24 | bool colo_supported(void) |
25 | { | |
26 | return false; | |
27 | } | |
0b827d5e HZ |
28 | |
29 | bool migration_in_colo_state(void) | |
30 | { | |
31 | MigrationState *s = migrate_get_current(); | |
32 | ||
33 | return (s->state == MIGRATION_STATUS_COLO); | |
34 | } | |
35 | ||
25d0c16f HZ |
36 | bool migration_incoming_in_colo_state(void) |
37 | { | |
38 | MigrationIncomingState *mis = migration_incoming_get_current(); | |
39 | ||
40 | return mis && (mis->state == MIGRATION_STATUS_COLO); | |
41 | } | |
42 | ||
4f97558e HZ |
43 | static void colo_send_message(QEMUFile *f, COLOMessage msg, |
44 | Error **errp) | |
45 | { | |
46 | int ret; | |
47 | ||
48 | if (msg >= COLO_MESSAGE__MAX) { | |
49 | error_setg(errp, "%s: Invalid message", __func__); | |
50 | return; | |
51 | } | |
52 | qemu_put_be32(f, msg); | |
53 | qemu_fflush(f); | |
54 | ||
55 | ret = qemu_file_get_error(f); | |
56 | if (ret < 0) { | |
57 | error_setg_errno(errp, -ret, "Can't send COLO message"); | |
58 | } | |
59 | trace_colo_send_message(COLOMessage_lookup[msg]); | |
60 | } | |
61 | ||
a91246c9 HZ |
62 | static void colo_send_message_value(QEMUFile *f, COLOMessage msg, |
63 | uint64_t value, Error **errp) | |
64 | { | |
65 | Error *local_err = NULL; | |
66 | int ret; | |
67 | ||
68 | colo_send_message(f, msg, &local_err); | |
69 | if (local_err) { | |
70 | error_propagate(errp, local_err); | |
71 | return; | |
72 | } | |
73 | qemu_put_be64(f, value); | |
74 | qemu_fflush(f); | |
75 | ||
76 | ret = qemu_file_get_error(f); | |
77 | if (ret < 0) { | |
78 | error_setg_errno(errp, -ret, "Failed to send value for message:%s", | |
79 | COLOMessage_lookup[msg]); | |
80 | } | |
81 | } | |
82 | ||
4f97558e HZ |
83 | static COLOMessage colo_receive_message(QEMUFile *f, Error **errp) |
84 | { | |
85 | COLOMessage msg; | |
86 | int ret; | |
87 | ||
88 | msg = qemu_get_be32(f); | |
89 | ret = qemu_file_get_error(f); | |
90 | if (ret < 0) { | |
91 | error_setg_errno(errp, -ret, "Can't receive COLO message"); | |
92 | return msg; | |
93 | } | |
94 | if (msg >= COLO_MESSAGE__MAX) { | |
95 | error_setg(errp, "%s: Invalid message", __func__); | |
96 | return msg; | |
97 | } | |
98 | trace_colo_receive_message(COLOMessage_lookup[msg]); | |
99 | return msg; | |
100 | } | |
101 | ||
102 | static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg, | |
103 | Error **errp) | |
104 | { | |
105 | COLOMessage msg; | |
106 | Error *local_err = NULL; | |
107 | ||
108 | msg = colo_receive_message(f, &local_err); | |
109 | if (local_err) { | |
110 | error_propagate(errp, local_err); | |
111 | return; | |
112 | } | |
113 | if (msg != expect_msg) { | |
114 | error_setg(errp, "Unexpected COLO message %d, expected %d", | |
115 | msg, expect_msg); | |
116 | } | |
117 | } | |
118 | ||
4291d372 HZ |
119 | static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg, |
120 | Error **errp) | |
121 | { | |
122 | Error *local_err = NULL; | |
123 | uint64_t value; | |
124 | int ret; | |
125 | ||
126 | colo_receive_check_message(f, expect_msg, &local_err); | |
127 | if (local_err) { | |
128 | error_propagate(errp, local_err); | |
129 | return 0; | |
130 | } | |
131 | ||
132 | value = qemu_get_be64(f); | |
133 | ret = qemu_file_get_error(f); | |
134 | if (ret < 0) { | |
135 | error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s", | |
136 | COLOMessage_lookup[expect_msg]); | |
137 | } | |
138 | return value; | |
139 | } | |
140 | ||
a91246c9 HZ |
141 | static int colo_do_checkpoint_transaction(MigrationState *s, |
142 | QIOChannelBuffer *bioc, | |
143 | QEMUFile *fb) | |
4f97558e HZ |
144 | { |
145 | Error *local_err = NULL; | |
a91246c9 | 146 | int ret = -1; |
4f97558e HZ |
147 | |
148 | colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, | |
149 | &local_err); | |
150 | if (local_err) { | |
151 | goto out; | |
152 | } | |
153 | ||
154 | colo_receive_check_message(s->rp_state.from_dst_file, | |
155 | COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); | |
156 | if (local_err) { | |
157 | goto out; | |
158 | } | |
a91246c9 HZ |
159 | /* Reset channel-buffer directly */ |
160 | qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); | |
161 | bioc->usage = 0; | |
4f97558e | 162 | |
a91246c9 HZ |
163 | qemu_mutex_lock_iothread(); |
164 | vm_stop_force_state(RUN_STATE_COLO); | |
165 | qemu_mutex_unlock_iothread(); | |
166 | trace_colo_vm_state_change("run", "stop"); | |
167 | ||
168 | /* Disable block migration */ | |
169 | s->params.blk = 0; | |
170 | s->params.shared = 0; | |
171 | qemu_savevm_state_header(fb); | |
172 | qemu_savevm_state_begin(fb, &s->params); | |
173 | qemu_mutex_lock_iothread(); | |
174 | qemu_savevm_state_complete_precopy(fb, false); | |
175 | qemu_mutex_unlock_iothread(); | |
176 | ||
177 | qemu_fflush(fb); | |
4f97558e HZ |
178 | |
179 | colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); | |
180 | if (local_err) { | |
181 | goto out; | |
182 | } | |
a91246c9 HZ |
183 | /* |
184 | * We need the size of the VMstate data in Secondary side, | |
185 | * With which we can decide how much data should be read. | |
186 | */ | |
187 | colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE, | |
188 | bioc->usage, &local_err); | |
189 | if (local_err) { | |
190 | goto out; | |
191 | } | |
4f97558e | 192 | |
a91246c9 HZ |
193 | qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage); |
194 | qemu_fflush(s->to_dst_file); | |
195 | ret = qemu_file_get_error(s->to_dst_file); | |
196 | if (ret < 0) { | |
197 | goto out; | |
198 | } | |
4f97558e HZ |
199 | |
200 | colo_receive_check_message(s->rp_state.from_dst_file, | |
201 | COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); | |
202 | if (local_err) { | |
203 | goto out; | |
204 | } | |
205 | ||
206 | colo_receive_check_message(s->rp_state.from_dst_file, | |
207 | COLO_MESSAGE_VMSTATE_LOADED, &local_err); | |
208 | if (local_err) { | |
209 | goto out; | |
210 | } | |
211 | ||
a91246c9 HZ |
212 | ret = 0; |
213 | ||
214 | qemu_mutex_lock_iothread(); | |
215 | vm_start(); | |
216 | qemu_mutex_unlock_iothread(); | |
217 | trace_colo_vm_state_change("stop", "run"); | |
4f97558e | 218 | |
4f97558e HZ |
219 | out: |
220 | if (local_err) { | |
221 | error_report_err(local_err); | |
222 | } | |
a91246c9 | 223 | return ret; |
4f97558e HZ |
224 | } |
225 | ||
0b827d5e HZ |
226 | static void colo_process_checkpoint(MigrationState *s) |
227 | { | |
a91246c9 HZ |
228 | QIOChannelBuffer *bioc; |
229 | QEMUFile *fb = NULL; | |
18cc23d7 | 230 | int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); |
4f97558e HZ |
231 | Error *local_err = NULL; |
232 | int ret; | |
233 | ||
56ba83d2 HZ |
234 | s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file); |
235 | if (!s->rp_state.from_dst_file) { | |
236 | error_report("Open QEMUFile from_dst_file failed"); | |
237 | goto out; | |
238 | } | |
239 | ||
4f97558e HZ |
240 | /* |
241 | * Wait for Secondary finish loading VM states and enter COLO | |
242 | * restore. | |
243 | */ | |
244 | colo_receive_check_message(s->rp_state.from_dst_file, | |
245 | COLO_MESSAGE_CHECKPOINT_READY, &local_err); | |
246 | if (local_err) { | |
247 | goto out; | |
248 | } | |
a91246c9 HZ |
249 | bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); |
250 | fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); | |
251 | object_unref(OBJECT(bioc)); | |
4f97558e | 252 | |
0b827d5e HZ |
253 | qemu_mutex_lock_iothread(); |
254 | vm_start(); | |
255 | qemu_mutex_unlock_iothread(); | |
256 | trace_colo_vm_state_change("stop", "run"); | |
257 | ||
4f97558e | 258 | while (s->state == MIGRATION_STATUS_COLO) { |
18cc23d7 HZ |
259 | current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); |
260 | if (current_time - checkpoint_time < | |
261 | s->parameters.x_checkpoint_delay) { | |
262 | int64_t delay_ms; | |
263 | ||
264 | delay_ms = s->parameters.x_checkpoint_delay - | |
265 | (current_time - checkpoint_time); | |
266 | g_usleep(delay_ms * 1000); | |
267 | } | |
a91246c9 | 268 | ret = colo_do_checkpoint_transaction(s, bioc, fb); |
4f97558e HZ |
269 | if (ret < 0) { |
270 | goto out; | |
271 | } | |
18cc23d7 | 272 | checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); |
4f97558e | 273 | } |
0b827d5e | 274 | |
56ba83d2 | 275 | out: |
4f97558e HZ |
276 | /* Throw the unreported error message after exited from loop */ |
277 | if (local_err) { | |
278 | error_report_err(local_err); | |
279 | } | |
280 | ||
a91246c9 HZ |
281 | if (fb) { |
282 | qemu_fclose(fb); | |
283 | } | |
284 | ||
56ba83d2 HZ |
285 | if (s->rp_state.from_dst_file) { |
286 | qemu_fclose(s->rp_state.from_dst_file); | |
287 | } | |
0b827d5e HZ |
288 | } |
289 | ||
290 | void migrate_start_colo_process(MigrationState *s) | |
291 | { | |
292 | qemu_mutex_unlock_iothread(); | |
293 | migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, | |
294 | MIGRATION_STATUS_COLO); | |
295 | colo_process_checkpoint(s); | |
296 | qemu_mutex_lock_iothread(); | |
297 | } | |
25d0c16f | 298 | |
4f97558e HZ |
299 | static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request, |
300 | Error **errp) | |
301 | { | |
302 | COLOMessage msg; | |
303 | Error *local_err = NULL; | |
304 | ||
305 | msg = colo_receive_message(f, &local_err); | |
306 | if (local_err) { | |
307 | error_propagate(errp, local_err); | |
308 | return; | |
309 | } | |
310 | ||
311 | switch (msg) { | |
312 | case COLO_MESSAGE_CHECKPOINT_REQUEST: | |
313 | *checkpoint_request = 1; | |
314 | break; | |
315 | default: | |
316 | *checkpoint_request = 0; | |
317 | error_setg(errp, "Got unknown COLO message: %d", msg); | |
318 | break; | |
319 | } | |
320 | } | |
321 | ||
25d0c16f HZ |
322 | void *colo_process_incoming_thread(void *opaque) |
323 | { | |
324 | MigrationIncomingState *mis = opaque; | |
4291d372 HZ |
325 | QEMUFile *fb = NULL; |
326 | QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */ | |
327 | uint64_t total_size; | |
328 | uint64_t value; | |
4f97558e | 329 | Error *local_err = NULL; |
25d0c16f HZ |
330 | |
331 | migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, | |
332 | MIGRATION_STATUS_COLO); | |
333 | ||
56ba83d2 HZ |
334 | mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); |
335 | if (!mis->to_src_file) { | |
336 | error_report("COLO incoming thread: Open QEMUFile to_src_file failed"); | |
337 | goto out; | |
338 | } | |
339 | /* | |
340 | * Note: the communication between Primary side and Secondary side | |
341 | * should be sequential, we set the fd to unblocked in migration incoming | |
342 | * coroutine, and here we are in the COLO incoming thread, so it is ok to | |
343 | * set the fd back to blocked. | |
344 | */ | |
345 | qemu_file_set_blocking(mis->from_src_file, true); | |
346 | ||
4291d372 HZ |
347 | bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); |
348 | fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); | |
349 | object_unref(OBJECT(bioc)); | |
350 | ||
4f97558e HZ |
351 | colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, |
352 | &local_err); | |
353 | if (local_err) { | |
354 | goto out; | |
355 | } | |
356 | ||
357 | while (mis->state == MIGRATION_STATUS_COLO) { | |
358 | int request; | |
359 | ||
360 | colo_wait_handle_message(mis->from_src_file, &request, &local_err); | |
361 | if (local_err) { | |
362 | goto out; | |
363 | } | |
364 | assert(request); | |
365 | /* FIXME: This is unnecessary for periodic checkpoint mode */ | |
366 | colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, | |
367 | &local_err); | |
368 | if (local_err) { | |
369 | goto out; | |
370 | } | |
371 | ||
372 | colo_receive_check_message(mis->from_src_file, | |
373 | COLO_MESSAGE_VMSTATE_SEND, &local_err); | |
374 | if (local_err) { | |
375 | goto out; | |
376 | } | |
377 | ||
4291d372 HZ |
378 | value = colo_receive_message_value(mis->from_src_file, |
379 | COLO_MESSAGE_VMSTATE_SIZE, &local_err); | |
380 | if (local_err) { | |
381 | goto out; | |
382 | } | |
383 | ||
384 | /* | |
385 | * Read VM device state data into channel buffer, | |
386 | * It's better to re-use the memory allocated. | |
387 | * Here we need to handle the channel buffer directly. | |
388 | */ | |
389 | if (value > bioc->capacity) { | |
390 | bioc->capacity = value; | |
391 | bioc->data = g_realloc(bioc->data, bioc->capacity); | |
392 | } | |
393 | total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value); | |
394 | if (total_size != value) { | |
395 | error_report("Got %" PRIu64 " VMState data, less than expected" | |
396 | " %" PRIu64, total_size, value); | |
397 | goto out; | |
398 | } | |
399 | bioc->usage = total_size; | |
400 | qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); | |
4f97558e HZ |
401 | |
402 | colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, | |
403 | &local_err); | |
404 | if (local_err) { | |
405 | goto out; | |
406 | } | |
407 | ||
4291d372 HZ |
408 | qemu_mutex_lock_iothread(); |
409 | qemu_system_reset(VMRESET_SILENT); | |
410 | if (qemu_loadvm_state(fb) < 0) { | |
411 | error_report("COLO: loadvm failed"); | |
412 | qemu_mutex_unlock_iothread(); | |
413 | goto out; | |
414 | } | |
415 | qemu_mutex_unlock_iothread(); | |
4f97558e HZ |
416 | |
417 | colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, | |
418 | &local_err); | |
419 | if (local_err) { | |
420 | goto out; | |
421 | } | |
422 | } | |
25d0c16f | 423 | |
56ba83d2 | 424 | out: |
4f97558e HZ |
425 | /* Throw the unreported error message after exited from loop */ |
426 | if (local_err) { | |
427 | error_report_err(local_err); | |
428 | } | |
429 | ||
4291d372 HZ |
430 | if (fb) { |
431 | qemu_fclose(fb); | |
432 | } | |
433 | ||
56ba83d2 HZ |
434 | if (mis->to_src_file) { |
435 | qemu_fclose(mis->to_src_file); | |
436 | } | |
25d0c16f HZ |
437 | migration_incoming_exit_colo(); |
438 | ||
439 | return NULL; | |
440 | } |