]>
Commit | Line | Data |
---|---|---|
35a6ed4f HZ |
1 | /* |
2 | * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | |
3 | * (a.k.a. Fault Tolerance or Continuous Replication) | |
4 | * | |
5 | * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | |
6 | * Copyright (c) 2016 FUJITSU LIMITED | |
7 | * Copyright (c) 2016 Intel Corporation | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
10 | * later. See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | #include "qemu/osdep.h" | |
0b827d5e | 14 | #include "sysemu/sysemu.h" |
e688df6b | 15 | #include "qapi/error.h" |
9af23989 | 16 | #include "qapi/qapi-commands-migration.h" |
40014d81 | 17 | #include "qemu-file-channel.h" |
6666c96a | 18 | #include "migration.h" |
08a0aee1 | 19 | #include "qemu-file.h" |
20a519a0 | 20 | #include "savevm.h" |
35a6ed4f | 21 | #include "migration/colo.h" |
2c9e6fec | 22 | #include "block.h" |
a91246c9 | 23 | #include "io/channel-buffer.h" |
0b827d5e | 24 | #include "trace.h" |
56ba83d2 | 25 | #include "qemu/error-report.h" |
d89e666e | 26 | #include "migration/failover.h" |
2c9639ec | 27 | #include "replication.h" |
131b2153 ZC |
28 | #include "net/colo-compare.h" |
29 | #include "net/colo.h" | |
8e48ac95 | 30 | #include "block/block.h" |
9ecff6d6 | 31 | #include "qapi/qapi-events-migration.h" |
f56c0065 | 32 | #include "qapi/qmp/qerror.h" |
3f6df99d | 33 | #include "sysemu/cpus.h" |
7b343530 | 34 | #include "net/filter.h" |
35a6ed4f | 35 | |
a8664ba5 | 36 | static bool vmstate_loading; |
131b2153 | 37 | static Notifier packets_compare_notifier; |
a8664ba5 | 38 | |
a91246c9 HZ |
39 | #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) |
40 | ||
0b827d5e HZ |
41 | bool migration_in_colo_state(void) |
42 | { | |
43 | MigrationState *s = migrate_get_current(); | |
44 | ||
45 | return (s->state == MIGRATION_STATUS_COLO); | |
46 | } | |
47 | ||
25d0c16f HZ |
48 | bool migration_incoming_in_colo_state(void) |
49 | { | |
50 | MigrationIncomingState *mis = migration_incoming_get_current(); | |
51 | ||
52 | return mis && (mis->state == MIGRATION_STATUS_COLO); | |
53 | } | |
54 | ||
b3f7f0c5 HZ |
55 | static bool colo_runstate_is_stopped(void) |
56 | { | |
57 | return runstate_check(RUN_STATE_COLO) || !runstate_is_running(); | |
58 | } | |
59 | ||
9d2db376 HZ |
60 | static void secondary_vm_do_failover(void) |
61 | { | |
62 | int old_state; | |
63 | MigrationIncomingState *mis = migration_incoming_get_current(); | |
8e48ac95 | 64 | Error *local_err = NULL; |
9d2db376 | 65 | |
a8664ba5 HZ |
66 | /* Can not do failover during the process of VM's loading VMstate, Or |
67 | * it will break the secondary VM. | |
68 | */ | |
69 | if (vmstate_loading) { | |
70 | old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, | |
71 | FAILOVER_STATUS_RELAUNCH); | |
72 | if (old_state != FAILOVER_STATUS_ACTIVE) { | |
73 | error_report("Unknown error while do failover for secondary VM," | |
977c736f | 74 | "old_state: %s", FailoverStatus_str(old_state)); |
a8664ba5 HZ |
75 | } |
76 | return; | |
77 | } | |
78 | ||
9d2db376 HZ |
79 | migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, |
80 | MIGRATION_STATUS_COMPLETED); | |
81 | ||
8e48ac95 ZC |
82 | replication_stop_all(true, &local_err); |
83 | if (local_err) { | |
84 | error_report_err(local_err); | |
85 | } | |
86 | ||
7b343530 HZ |
87 | /* Notify all filters of all NIC to do checkpoint */ |
88 | colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err); | |
89 | if (local_err) { | |
90 | error_report_err(local_err); | |
91 | } | |
92 | ||
9d2db376 HZ |
93 | if (!autostart) { |
94 | error_report("\"-S\" qemu option will be ignored in secondary side"); | |
95 | /* recover runstate to normal migration finish state */ | |
96 | autostart = true; | |
97 | } | |
c937b9a6 HZ |
98 | /* |
99 | * Make sure COLO incoming thread not block in recv or send, | |
100 | * If mis->from_src_file and mis->to_src_file use the same fd, | |
101 | * The second shutdown() will return -1, we ignore this value, | |
102 | * It is harmless. | |
103 | */ | |
104 | if (mis->from_src_file) { | |
105 | qemu_file_shutdown(mis->from_src_file); | |
106 | } | |
107 | if (mis->to_src_file) { | |
108 | qemu_file_shutdown(mis->to_src_file); | |
109 | } | |
9d2db376 HZ |
110 | |
111 | old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, | |
112 | FAILOVER_STATUS_COMPLETED); | |
113 | if (old_state != FAILOVER_STATUS_ACTIVE) { | |
114 | error_report("Incorrect state (%s) while doing failover for " | |
977c736f | 115 | "secondary VM", FailoverStatus_str(old_state)); |
9d2db376 HZ |
116 | return; |
117 | } | |
c937b9a6 HZ |
118 | /* Notify COLO incoming thread that failover work is finished */ |
119 | qemu_sem_post(&mis->colo_incoming_sem); | |
9d2db376 HZ |
120 | /* For Secondary VM, jump to incoming co */ |
121 | if (mis->migration_incoming_co) { | |
122 | qemu_coroutine_enter(mis->migration_incoming_co); | |
123 | } | |
124 | } | |
125 | ||
b3f7f0c5 HZ |
126 | static void primary_vm_do_failover(void) |
127 | { | |
128 | MigrationState *s = migrate_get_current(); | |
129 | int old_state; | |
8e48ac95 | 130 | Error *local_err = NULL; |
b3f7f0c5 HZ |
131 | |
132 | migrate_set_state(&s->state, MIGRATION_STATUS_COLO, | |
133 | MIGRATION_STATUS_COMPLETED); | |
2518aec1 HZ |
134 | /* |
135 | * kick COLO thread which might wait at | |
136 | * qemu_sem_wait(&s->colo_checkpoint_sem). | |
137 | */ | |
138 | colo_checkpoint_notify(migrate_get_current()); | |
b3f7f0c5 | 139 | |
c937b9a6 HZ |
140 | /* |
141 | * Wake up COLO thread which may blocked in recv() or send(), | |
142 | * The s->rp_state.from_dst_file and s->to_dst_file may use the | |
143 | * same fd, but we still shutdown the fd for twice, it is harmless. | |
144 | */ | |
145 | if (s->to_dst_file) { | |
146 | qemu_file_shutdown(s->to_dst_file); | |
147 | } | |
148 | if (s->rp_state.from_dst_file) { | |
149 | qemu_file_shutdown(s->rp_state.from_dst_file); | |
150 | } | |
151 | ||
b3f7f0c5 HZ |
152 | old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, |
153 | FAILOVER_STATUS_COMPLETED); | |
154 | if (old_state != FAILOVER_STATUS_ACTIVE) { | |
155 | error_report("Incorrect state (%s) while doing failover for Primary VM", | |
977c736f | 156 | FailoverStatus_str(old_state)); |
b3f7f0c5 HZ |
157 | return; |
158 | } | |
8e48ac95 ZC |
159 | |
160 | replication_stop_all(true, &local_err); | |
161 | if (local_err) { | |
162 | error_report_err(local_err); | |
163 | local_err = NULL; | |
164 | } | |
165 | ||
c937b9a6 HZ |
166 | /* Notify COLO thread that failover work is finished */ |
167 | qemu_sem_post(&s->colo_exit_sem); | |
b3f7f0c5 HZ |
168 | } |
169 | ||
aad555c2 ZC |
170 | COLOMode get_colo_mode(void) |
171 | { | |
172 | if (migration_in_colo_state()) { | |
173 | return COLO_MODE_PRIMARY; | |
174 | } else if (migration_incoming_in_colo_state()) { | |
175 | return COLO_MODE_SECONDARY; | |
176 | } else { | |
41b6b779 | 177 | return COLO_MODE_NONE; |
aad555c2 ZC |
178 | } |
179 | } | |
180 | ||
b3f7f0c5 HZ |
181 | void colo_do_failover(MigrationState *s) |
182 | { | |
183 | /* Make sure VM stopped while failover happened. */ | |
184 | if (!colo_runstate_is_stopped()) { | |
185 | vm_stop_force_state(RUN_STATE_COLO); | |
186 | } | |
187 | ||
188 | if (get_colo_mode() == COLO_MODE_PRIMARY) { | |
189 | primary_vm_do_failover(); | |
9d2db376 HZ |
190 | } else { |
191 | secondary_vm_do_failover(); | |
b3f7f0c5 HZ |
192 | } |
193 | } | |
194 | ||
2c9639ec ZC |
195 | void qmp_xen_set_replication(bool enable, bool primary, |
196 | bool has_failover, bool failover, | |
197 | Error **errp) | |
198 | { | |
38bb54f3 | 199 | #ifdef CONFIG_REPLICATION |
2c9639ec ZC |
200 | ReplicationMode mode = primary ? |
201 | REPLICATION_MODE_PRIMARY : | |
202 | REPLICATION_MODE_SECONDARY; | |
203 | ||
204 | if (has_failover && enable) { | |
205 | error_setg(errp, "Parameter 'failover' is only for" | |
206 | " stopping replication"); | |
207 | return; | |
208 | } | |
209 | ||
210 | if (enable) { | |
211 | replication_start_all(mode, errp); | |
212 | } else { | |
213 | if (!has_failover) { | |
214 | failover = NULL; | |
215 | } | |
216 | replication_stop_all(failover, failover ? NULL : errp); | |
217 | } | |
38bb54f3 MA |
218 | #else |
219 | abort(); | |
220 | #endif | |
2c9639ec ZC |
221 | } |
222 | ||
daa33c52 ZC |
223 | ReplicationStatus *qmp_query_xen_replication_status(Error **errp) |
224 | { | |
38bb54f3 | 225 | #ifdef CONFIG_REPLICATION |
daa33c52 ZC |
226 | Error *err = NULL; |
227 | ReplicationStatus *s = g_new0(ReplicationStatus, 1); | |
228 | ||
229 | replication_get_error_all(&err); | |
230 | if (err) { | |
231 | s->error = true; | |
232 | s->has_desc = true; | |
233 | s->desc = g_strdup(error_get_pretty(err)); | |
234 | } else { | |
235 | s->error = false; | |
236 | } | |
237 | ||
238 | error_free(err); | |
239 | return s; | |
38bb54f3 MA |
240 | #else |
241 | abort(); | |
242 | #endif | |
daa33c52 ZC |
243 | } |
244 | ||
245 | void qmp_xen_colo_do_checkpoint(Error **errp) | |
246 | { | |
38bb54f3 | 247 | #ifdef CONFIG_REPLICATION |
daa33c52 | 248 | replication_do_checkpoint_all(errp); |
38bb54f3 MA |
249 | #else |
250 | abort(); | |
251 | #endif | |
daa33c52 ZC |
252 | } |
253 | ||
f56c0065 ZC |
254 | COLOStatus *qmp_query_colo_status(Error **errp) |
255 | { | |
256 | COLOStatus *s = g_new0(COLOStatus, 1); | |
257 | ||
258 | s->mode = get_colo_mode(); | |
259 | ||
260 | switch (failover_get_state()) { | |
261 | case FAILOVER_STATUS_NONE: | |
262 | s->reason = COLO_EXIT_REASON_NONE; | |
263 | break; | |
264 | case FAILOVER_STATUS_REQUIRE: | |
265 | s->reason = COLO_EXIT_REASON_REQUEST; | |
266 | break; | |
267 | default: | |
268 | s->reason = COLO_EXIT_REASON_ERROR; | |
269 | } | |
270 | ||
271 | return s; | |
272 | } | |
273 | ||
4f97558e HZ |
274 | static void colo_send_message(QEMUFile *f, COLOMessage msg, |
275 | Error **errp) | |
276 | { | |
277 | int ret; | |
278 | ||
279 | if (msg >= COLO_MESSAGE__MAX) { | |
280 | error_setg(errp, "%s: Invalid message", __func__); | |
281 | return; | |
282 | } | |
283 | qemu_put_be32(f, msg); | |
284 | qemu_fflush(f); | |
285 | ||
286 | ret = qemu_file_get_error(f); | |
287 | if (ret < 0) { | |
288 | error_setg_errno(errp, -ret, "Can't send COLO message"); | |
289 | } | |
977c736f | 290 | trace_colo_send_message(COLOMessage_str(msg)); |
4f97558e HZ |
291 | } |
292 | ||
a91246c9 HZ |
293 | static void colo_send_message_value(QEMUFile *f, COLOMessage msg, |
294 | uint64_t value, Error **errp) | |
295 | { | |
296 | Error *local_err = NULL; | |
297 | int ret; | |
298 | ||
299 | colo_send_message(f, msg, &local_err); | |
300 | if (local_err) { | |
301 | error_propagate(errp, local_err); | |
302 | return; | |
303 | } | |
304 | qemu_put_be64(f, value); | |
305 | qemu_fflush(f); | |
306 | ||
307 | ret = qemu_file_get_error(f); | |
308 | if (ret < 0) { | |
309 | error_setg_errno(errp, -ret, "Failed to send value for message:%s", | |
977c736f | 310 | COLOMessage_str(msg)); |
a91246c9 HZ |
311 | } |
312 | } | |
313 | ||
4f97558e HZ |
314 | static COLOMessage colo_receive_message(QEMUFile *f, Error **errp) |
315 | { | |
316 | COLOMessage msg; | |
317 | int ret; | |
318 | ||
319 | msg = qemu_get_be32(f); | |
320 | ret = qemu_file_get_error(f); | |
321 | if (ret < 0) { | |
322 | error_setg_errno(errp, -ret, "Can't receive COLO message"); | |
323 | return msg; | |
324 | } | |
325 | if (msg >= COLO_MESSAGE__MAX) { | |
326 | error_setg(errp, "%s: Invalid message", __func__); | |
327 | return msg; | |
328 | } | |
977c736f | 329 | trace_colo_receive_message(COLOMessage_str(msg)); |
4f97558e HZ |
330 | return msg; |
331 | } | |
332 | ||
333 | static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg, | |
334 | Error **errp) | |
335 | { | |
336 | COLOMessage msg; | |
337 | Error *local_err = NULL; | |
338 | ||
339 | msg = colo_receive_message(f, &local_err); | |
340 | if (local_err) { | |
341 | error_propagate(errp, local_err); | |
342 | return; | |
343 | } | |
344 | if (msg != expect_msg) { | |
345 | error_setg(errp, "Unexpected COLO message %d, expected %d", | |
346 | msg, expect_msg); | |
347 | } | |
348 | } | |
349 | ||
4291d372 HZ |
350 | static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg, |
351 | Error **errp) | |
352 | { | |
353 | Error *local_err = NULL; | |
354 | uint64_t value; | |
355 | int ret; | |
356 | ||
357 | colo_receive_check_message(f, expect_msg, &local_err); | |
358 | if (local_err) { | |
359 | error_propagate(errp, local_err); | |
360 | return 0; | |
361 | } | |
362 | ||
363 | value = qemu_get_be64(f); | |
364 | ret = qemu_file_get_error(f); | |
365 | if (ret < 0) { | |
366 | error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s", | |
977c736f | 367 | COLOMessage_str(expect_msg)); |
4291d372 HZ |
368 | } |
369 | return value; | |
370 | } | |
371 | ||
a91246c9 HZ |
372 | static int colo_do_checkpoint_transaction(MigrationState *s, |
373 | QIOChannelBuffer *bioc, | |
374 | QEMUFile *fb) | |
4f97558e HZ |
375 | { |
376 | Error *local_err = NULL; | |
a91246c9 | 377 | int ret = -1; |
4f97558e HZ |
378 | |
379 | colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, | |
380 | &local_err); | |
381 | if (local_err) { | |
382 | goto out; | |
383 | } | |
384 | ||
385 | colo_receive_check_message(s->rp_state.from_dst_file, | |
386 | COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); | |
387 | if (local_err) { | |
388 | goto out; | |
389 | } | |
a91246c9 HZ |
390 | /* Reset channel-buffer directly */ |
391 | qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); | |
392 | bioc->usage = 0; | |
4f97558e | 393 | |
a91246c9 | 394 | qemu_mutex_lock_iothread(); |
b3f7f0c5 HZ |
395 | if (failover_get_state() != FAILOVER_STATUS_NONE) { |
396 | qemu_mutex_unlock_iothread(); | |
397 | goto out; | |
398 | } | |
a91246c9 HZ |
399 | vm_stop_force_state(RUN_STATE_COLO); |
400 | qemu_mutex_unlock_iothread(); | |
401 | trace_colo_vm_state_change("run", "stop"); | |
b3f7f0c5 HZ |
402 | /* |
403 | * Failover request bh could be called after vm_stop_force_state(), | |
404 | * So we need check failover_request_is_active() again. | |
405 | */ | |
406 | if (failover_get_state() != FAILOVER_STATUS_NONE) { | |
407 | goto out; | |
408 | } | |
a91246c9 | 409 | |
131b2153 ZC |
410 | colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); |
411 | if (local_err) { | |
412 | goto out; | |
413 | } | |
414 | ||
a91246c9 | 415 | /* Disable block migration */ |
ce7c817c | 416 | migrate_set_block_enabled(false, &local_err); |
a91246c9 | 417 | qemu_mutex_lock_iothread(); |
8e48ac95 ZC |
418 | replication_do_checkpoint_all(&local_err); |
419 | if (local_err) { | |
420 | qemu_mutex_unlock_iothread(); | |
421 | goto out; | |
422 | } | |
4f97558e HZ |
423 | |
424 | colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); | |
425 | if (local_err) { | |
3f6df99d ZC |
426 | qemu_mutex_unlock_iothread(); |
427 | goto out; | |
428 | } | |
429 | /* Note: device state is saved into buffer */ | |
430 | ret = qemu_save_device_state(fb); | |
431 | ||
432 | qemu_mutex_unlock_iothread(); | |
433 | if (ret < 0) { | |
4f97558e HZ |
434 | goto out; |
435 | } | |
3f6df99d ZC |
436 | /* |
437 | * Only save VM's live state, which not including device state. | |
438 | * TODO: We may need a timeout mechanism to prevent COLO process | |
439 | * to be blocked here. | |
440 | */ | |
441 | qemu_savevm_live_state(s->to_dst_file); | |
442 | ||
443 | qemu_fflush(fb); | |
444 | ||
a91246c9 HZ |
445 | /* |
446 | * We need the size of the VMstate data in Secondary side, | |
447 | * With which we can decide how much data should be read. | |
448 | */ | |
449 | colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE, | |
450 | bioc->usage, &local_err); | |
451 | if (local_err) { | |
452 | goto out; | |
453 | } | |
4f97558e | 454 | |
a91246c9 HZ |
455 | qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage); |
456 | qemu_fflush(s->to_dst_file); | |
457 | ret = qemu_file_get_error(s->to_dst_file); | |
458 | if (ret < 0) { | |
459 | goto out; | |
460 | } | |
4f97558e HZ |
461 | |
462 | colo_receive_check_message(s->rp_state.from_dst_file, | |
463 | COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); | |
464 | if (local_err) { | |
465 | goto out; | |
466 | } | |
467 | ||
468 | colo_receive_check_message(s->rp_state.from_dst_file, | |
469 | COLO_MESSAGE_VMSTATE_LOADED, &local_err); | |
470 | if (local_err) { | |
471 | goto out; | |
472 | } | |
473 | ||
a91246c9 HZ |
474 | ret = 0; |
475 | ||
476 | qemu_mutex_lock_iothread(); | |
477 | vm_start(); | |
478 | qemu_mutex_unlock_iothread(); | |
479 | trace_colo_vm_state_change("stop", "run"); | |
4f97558e | 480 | |
4f97558e HZ |
481 | out: |
482 | if (local_err) { | |
483 | error_report_err(local_err); | |
484 | } | |
a91246c9 | 485 | return ret; |
4f97558e HZ |
486 | } |
487 | ||
131b2153 ZC |
488 | static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) |
489 | { | |
490 | colo_checkpoint_notify(data); | |
491 | } | |
492 | ||
0b827d5e HZ |
493 | static void colo_process_checkpoint(MigrationState *s) |
494 | { | |
a91246c9 HZ |
495 | QIOChannelBuffer *bioc; |
496 | QEMUFile *fb = NULL; | |
479125d5 | 497 | int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); |
4f97558e HZ |
498 | Error *local_err = NULL; |
499 | int ret; | |
500 | ||
aef06085 HZ |
501 | failover_init_state(); |
502 | ||
56ba83d2 HZ |
503 | s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file); |
504 | if (!s->rp_state.from_dst_file) { | |
505 | error_report("Open QEMUFile from_dst_file failed"); | |
506 | goto out; | |
507 | } | |
508 | ||
131b2153 ZC |
509 | packets_compare_notifier.notify = colo_compare_notify_checkpoint; |
510 | colo_compare_register_notifier(&packets_compare_notifier); | |
511 | ||
4f97558e HZ |
512 | /* |
513 | * Wait for Secondary finish loading VM states and enter COLO | |
514 | * restore. | |
515 | */ | |
516 | colo_receive_check_message(s->rp_state.from_dst_file, | |
517 | COLO_MESSAGE_CHECKPOINT_READY, &local_err); | |
518 | if (local_err) { | |
519 | goto out; | |
520 | } | |
a91246c9 HZ |
521 | bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); |
522 | fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); | |
523 | object_unref(OBJECT(bioc)); | |
4f97558e | 524 | |
0b827d5e | 525 | qemu_mutex_lock_iothread(); |
8e48ac95 ZC |
526 | replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); |
527 | if (local_err) { | |
528 | qemu_mutex_unlock_iothread(); | |
529 | goto out; | |
530 | } | |
531 | ||
0b827d5e HZ |
532 | vm_start(); |
533 | qemu_mutex_unlock_iothread(); | |
534 | trace_colo_vm_state_change("stop", "run"); | |
535 | ||
479125d5 HZ |
536 | timer_mod(s->colo_delay_timer, |
537 | current_time + s->parameters.x_checkpoint_delay); | |
538 | ||
4f97558e | 539 | while (s->state == MIGRATION_STATUS_COLO) { |
b3f7f0c5 HZ |
540 | if (failover_get_state() != FAILOVER_STATUS_NONE) { |
541 | error_report("failover request"); | |
542 | goto out; | |
543 | } | |
544 | ||
479125d5 | 545 | qemu_sem_wait(&s->colo_checkpoint_sem); |
18cc23d7 | 546 | |
2518aec1 HZ |
547 | if (s->state != MIGRATION_STATUS_COLO) { |
548 | goto out; | |
549 | } | |
a91246c9 | 550 | ret = colo_do_checkpoint_transaction(s, bioc, fb); |
4f97558e HZ |
551 | if (ret < 0) { |
552 | goto out; | |
553 | } | |
554 | } | |
0b827d5e | 555 | |
56ba83d2 | 556 | out: |
4f97558e HZ |
557 | /* Throw the unreported error message after exited from loop */ |
558 | if (local_err) { | |
559 | error_report_err(local_err); | |
560 | } | |
561 | ||
a91246c9 HZ |
562 | if (fb) { |
563 | qemu_fclose(fb); | |
564 | } | |
565 | ||
9ecff6d6 HZ |
566 | /* |
567 | * There are only two reasons we can get here, some error happened | |
568 | * or the user triggered failover. | |
569 | */ | |
570 | switch (failover_get_state()) { | |
571 | case FAILOVER_STATUS_NONE: | |
572 | qapi_event_send_colo_exit(COLO_MODE_PRIMARY, | |
573 | COLO_EXIT_REASON_ERROR); | |
574 | break; | |
575 | case FAILOVER_STATUS_REQUIRE: | |
576 | qapi_event_send_colo_exit(COLO_MODE_PRIMARY, | |
577 | COLO_EXIT_REASON_REQUEST); | |
578 | break; | |
579 | default: | |
580 | abort(); | |
581 | } | |
582 | ||
c937b9a6 HZ |
583 | /* Hope this not to be too long to wait here */ |
584 | qemu_sem_wait(&s->colo_exit_sem); | |
585 | qemu_sem_destroy(&s->colo_exit_sem); | |
131b2153 ZC |
586 | |
587 | /* | |
588 | * It is safe to unregister notifier after failover finished. | |
589 | * Besides, colo_delay_timer and colo_checkpoint_sem can't be | |
590 | * released befor unregister notifier, or there will be use-after-free | |
591 | * error. | |
592 | */ | |
593 | colo_compare_unregister_notifier(&packets_compare_notifier); | |
594 | timer_del(s->colo_delay_timer); | |
595 | timer_free(s->colo_delay_timer); | |
596 | qemu_sem_destroy(&s->colo_checkpoint_sem); | |
597 | ||
c937b9a6 HZ |
598 | /* |
599 | * Must be called after failover BH is completed, | |
600 | * Or the failover BH may shutdown the wrong fd that | |
601 | * re-used by other threads after we release here. | |
602 | */ | |
56ba83d2 HZ |
603 | if (s->rp_state.from_dst_file) { |
604 | qemu_fclose(s->rp_state.from_dst_file); | |
605 | } | |
0b827d5e HZ |
606 | } |
607 | ||
479125d5 HZ |
608 | void colo_checkpoint_notify(void *opaque) |
609 | { | |
610 | MigrationState *s = opaque; | |
611 | int64_t next_notify_time; | |
612 | ||
613 | qemu_sem_post(&s->colo_checkpoint_sem); | |
614 | s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); | |
615 | next_notify_time = s->colo_checkpoint_time + | |
616 | s->parameters.x_checkpoint_delay; | |
617 | timer_mod(s->colo_delay_timer, next_notify_time); | |
618 | } | |
619 | ||
0b827d5e HZ |
620 | void migrate_start_colo_process(MigrationState *s) |
621 | { | |
622 | qemu_mutex_unlock_iothread(); | |
479125d5 HZ |
623 | qemu_sem_init(&s->colo_checkpoint_sem, 0); |
624 | s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, | |
625 | colo_checkpoint_notify, s); | |
626 | ||
c937b9a6 | 627 | qemu_sem_init(&s->colo_exit_sem, 0); |
0b827d5e HZ |
628 | migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, |
629 | MIGRATION_STATUS_COLO); | |
630 | colo_process_checkpoint(s); | |
631 | qemu_mutex_lock_iothread(); | |
632 | } | |
25d0c16f | 633 | |
4f97558e HZ |
634 | static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request, |
635 | Error **errp) | |
636 | { | |
637 | COLOMessage msg; | |
638 | Error *local_err = NULL; | |
639 | ||
640 | msg = colo_receive_message(f, &local_err); | |
641 | if (local_err) { | |
642 | error_propagate(errp, local_err); | |
643 | return; | |
644 | } | |
645 | ||
646 | switch (msg) { | |
647 | case COLO_MESSAGE_CHECKPOINT_REQUEST: | |
648 | *checkpoint_request = 1; | |
649 | break; | |
650 | default: | |
651 | *checkpoint_request = 0; | |
652 | error_setg(errp, "Got unknown COLO message: %d", msg); | |
653 | break; | |
654 | } | |
655 | } | |
656 | ||
25d0c16f HZ |
657 | void *colo_process_incoming_thread(void *opaque) |
658 | { | |
659 | MigrationIncomingState *mis = opaque; | |
4291d372 HZ |
660 | QEMUFile *fb = NULL; |
661 | QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */ | |
662 | uint64_t total_size; | |
663 | uint64_t value; | |
4f97558e | 664 | Error *local_err = NULL; |
3f6df99d | 665 | int ret; |
25d0c16f | 666 | |
74637e6f | 667 | rcu_register_thread(); |
c937b9a6 HZ |
668 | qemu_sem_init(&mis->colo_incoming_sem, 0); |
669 | ||
25d0c16f HZ |
670 | migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, |
671 | MIGRATION_STATUS_COLO); | |
672 | ||
aef06085 HZ |
673 | failover_init_state(); |
674 | ||
56ba83d2 HZ |
675 | mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); |
676 | if (!mis->to_src_file) { | |
677 | error_report("COLO incoming thread: Open QEMUFile to_src_file failed"); | |
678 | goto out; | |
679 | } | |
680 | /* | |
681 | * Note: the communication between Primary side and Secondary side | |
682 | * should be sequential, we set the fd to unblocked in migration incoming | |
683 | * coroutine, and here we are in the COLO incoming thread, so it is ok to | |
684 | * set the fd back to blocked. | |
685 | */ | |
686 | qemu_file_set_blocking(mis->from_src_file, true); | |
687 | ||
4291d372 HZ |
688 | bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); |
689 | fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); | |
690 | object_unref(OBJECT(bioc)); | |
691 | ||
131b2153 | 692 | qemu_mutex_lock_iothread(); |
8e48ac95 ZC |
693 | replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); |
694 | if (local_err) { | |
695 | qemu_mutex_unlock_iothread(); | |
696 | goto out; | |
697 | } | |
131b2153 ZC |
698 | vm_start(); |
699 | trace_colo_vm_state_change("stop", "run"); | |
700 | qemu_mutex_unlock_iothread(); | |
701 | ||
4f97558e HZ |
702 | colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, |
703 | &local_err); | |
704 | if (local_err) { | |
705 | goto out; | |
706 | } | |
707 | ||
708 | while (mis->state == MIGRATION_STATUS_COLO) { | |
02ba9265 | 709 | int request = 0; |
4f97558e HZ |
710 | |
711 | colo_wait_handle_message(mis->from_src_file, &request, &local_err); | |
712 | if (local_err) { | |
713 | goto out; | |
714 | } | |
715 | assert(request); | |
9d2db376 HZ |
716 | if (failover_get_state() != FAILOVER_STATUS_NONE) { |
717 | error_report("failover request"); | |
718 | goto out; | |
719 | } | |
720 | ||
131b2153 ZC |
721 | qemu_mutex_lock_iothread(); |
722 | vm_stop_force_state(RUN_STATE_COLO); | |
723 | trace_colo_vm_state_change("run", "stop"); | |
724 | qemu_mutex_unlock_iothread(); | |
725 | ||
4f97558e HZ |
726 | /* FIXME: This is unnecessary for periodic checkpoint mode */ |
727 | colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, | |
728 | &local_err); | |
729 | if (local_err) { | |
730 | goto out; | |
731 | } | |
732 | ||
733 | colo_receive_check_message(mis->from_src_file, | |
734 | COLO_MESSAGE_VMSTATE_SEND, &local_err); | |
735 | if (local_err) { | |
736 | goto out; | |
737 | } | |
738 | ||
3f6df99d ZC |
739 | qemu_mutex_lock_iothread(); |
740 | cpu_synchronize_all_pre_loadvm(); | |
741 | ret = qemu_loadvm_state_main(mis->from_src_file, mis); | |
742 | qemu_mutex_unlock_iothread(); | |
743 | ||
744 | if (ret < 0) { | |
745 | error_report("Load VM's live state (ram) error"); | |
746 | goto out; | |
747 | } | |
748 | ||
4291d372 HZ |
749 | value = colo_receive_message_value(mis->from_src_file, |
750 | COLO_MESSAGE_VMSTATE_SIZE, &local_err); | |
751 | if (local_err) { | |
752 | goto out; | |
753 | } | |
754 | ||
755 | /* | |
756 | * Read VM device state data into channel buffer, | |
757 | * It's better to re-use the memory allocated. | |
758 | * Here we need to handle the channel buffer directly. | |
759 | */ | |
760 | if (value > bioc->capacity) { | |
761 | bioc->capacity = value; | |
762 | bioc->data = g_realloc(bioc->data, bioc->capacity); | |
763 | } | |
764 | total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value); | |
765 | if (total_size != value) { | |
766 | error_report("Got %" PRIu64 " VMState data, less than expected" | |
767 | " %" PRIu64, total_size, value); | |
768 | goto out; | |
769 | } | |
770 | bioc->usage = total_size; | |
771 | qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); | |
4f97558e HZ |
772 | |
773 | colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, | |
774 | &local_err); | |
775 | if (local_err) { | |
776 | goto out; | |
777 | } | |
778 | ||
4291d372 | 779 | qemu_mutex_lock_iothread(); |
a8664ba5 | 780 | vmstate_loading = true; |
3f6df99d ZC |
781 | ret = qemu_load_device_state(fb); |
782 | if (ret < 0) { | |
783 | error_report("COLO: load device state failed"); | |
4291d372 HZ |
784 | qemu_mutex_unlock_iothread(); |
785 | goto out; | |
786 | } | |
a8664ba5 | 787 | |
8e48ac95 ZC |
788 | replication_get_error_all(&local_err); |
789 | if (local_err) { | |
790 | qemu_mutex_unlock_iothread(); | |
791 | goto out; | |
792 | } | |
793 | /* discard colo disk buffer */ | |
794 | replication_do_checkpoint_all(&local_err); | |
795 | if (local_err) { | |
796 | qemu_mutex_unlock_iothread(); | |
797 | goto out; | |
798 | } | |
799 | ||
7b343530 HZ |
800 | /* Notify all filters of all NIC to do checkpoint */ |
801 | colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); | |
802 | ||
803 | if (local_err) { | |
804 | qemu_mutex_unlock_iothread(); | |
805 | goto out; | |
806 | } | |
807 | ||
a8664ba5 | 808 | vmstate_loading = false; |
131b2153 ZC |
809 | vm_start(); |
810 | trace_colo_vm_state_change("stop", "run"); | |
4291d372 | 811 | qemu_mutex_unlock_iothread(); |
4f97558e | 812 | |
a8664ba5 HZ |
813 | if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { |
814 | failover_set_state(FAILOVER_STATUS_RELAUNCH, | |
815 | FAILOVER_STATUS_NONE); | |
816 | failover_request_active(NULL); | |
817 | goto out; | |
818 | } | |
819 | ||
4f97558e HZ |
820 | colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, |
821 | &local_err); | |
822 | if (local_err) { | |
823 | goto out; | |
824 | } | |
825 | } | |
25d0c16f | 826 | |
56ba83d2 | 827 | out: |
a8664ba5 | 828 | vmstate_loading = false; |
4f97558e HZ |
829 | /* Throw the unreported error message after exited from loop */ |
830 | if (local_err) { | |
831 | error_report_err(local_err); | |
832 | } | |
833 | ||
9ecff6d6 HZ |
834 | switch (failover_get_state()) { |
835 | case FAILOVER_STATUS_NONE: | |
836 | qapi_event_send_colo_exit(COLO_MODE_SECONDARY, | |
837 | COLO_EXIT_REASON_ERROR); | |
838 | break; | |
839 | case FAILOVER_STATUS_REQUIRE: | |
840 | qapi_event_send_colo_exit(COLO_MODE_SECONDARY, | |
841 | COLO_EXIT_REASON_REQUEST); | |
842 | break; | |
843 | default: | |
844 | abort(); | |
845 | } | |
846 | ||
4291d372 HZ |
847 | if (fb) { |
848 | qemu_fclose(fb); | |
849 | } | |
850 | ||
c937b9a6 HZ |
851 | /* Hope this not to be too long to loop here */ |
852 | qemu_sem_wait(&mis->colo_incoming_sem); | |
853 | qemu_sem_destroy(&mis->colo_incoming_sem); | |
854 | /* Must be called after failover BH is completed */ | |
56ba83d2 HZ |
855 | if (mis->to_src_file) { |
856 | qemu_fclose(mis->to_src_file); | |
857 | } | |
aad555c2 | 858 | migration_incoming_disable_colo(); |
25d0c16f | 859 | |
74637e6f | 860 | rcu_unregister_thread(); |
25d0c16f HZ |
861 | return NULL; |
862 | } |