]>
Commit | Line | Data |
---|---|---|
35a6ed4f HZ |
1 | /* |
2 | * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | |
3 | * (a.k.a. Fault Tolerance or Continuous Replication) | |
4 | * | |
5 | * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | |
6 | * Copyright (c) 2016 FUJITSU LIMITED | |
7 | * Copyright (c) 2016 Intel Corporation | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or | |
10 | * later. See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | #include "qemu/osdep.h" | |
0b827d5e | 14 | #include "sysemu/sysemu.h" |
e688df6b | 15 | #include "qapi/error.h" |
9af23989 | 16 | #include "qapi/qapi-commands-migration.h" |
40014d81 | 17 | #include "qemu-file-channel.h" |
6666c96a | 18 | #include "migration.h" |
08a0aee1 | 19 | #include "qemu-file.h" |
20a519a0 | 20 | #include "savevm.h" |
35a6ed4f | 21 | #include "migration/colo.h" |
2c9e6fec | 22 | #include "block.h" |
a91246c9 | 23 | #include "io/channel-buffer.h" |
0b827d5e | 24 | #include "trace.h" |
56ba83d2 | 25 | #include "qemu/error-report.h" |
db725815 | 26 | #include "qemu/main-loop.h" |
d4842052 | 27 | #include "qemu/rcu.h" |
d89e666e | 28 | #include "migration/failover.h" |
0393031a | 29 | #include "migration/ram.h" |
335d10cd | 30 | #ifdef CONFIG_REPLICATION |
2c9639ec | 31 | #include "replication.h" |
335d10cd | 32 | #endif |
131b2153 ZC |
33 | #include "net/colo-compare.h" |
34 | #include "net/colo.h" | |
8e48ac95 | 35 | #include "block/block.h" |
9ecff6d6 | 36 | #include "qapi/qapi-events-migration.h" |
f56c0065 | 37 | #include "qapi/qmp/qerror.h" |
3f6df99d | 38 | #include "sysemu/cpus.h" |
54d31236 | 39 | #include "sysemu/runstate.h" |
7b343530 | 40 | #include "net/filter.h" |
35a6ed4f | 41 | |
a8664ba5 | 42 | static bool vmstate_loading; |
131b2153 | 43 | static Notifier packets_compare_notifier; |
a8664ba5 | 44 | |
5ed0deca ZC |
45 | /* User need to know colo mode after COLO failover */ |
46 | static COLOMode last_colo_mode; | |
47 | ||
a91246c9 HZ |
48 | #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) |
49 | ||
0b827d5e HZ |
50 | bool migration_in_colo_state(void) |
51 | { | |
52 | MigrationState *s = migrate_get_current(); | |
53 | ||
54 | return (s->state == MIGRATION_STATUS_COLO); | |
55 | } | |
56 | ||
25d0c16f HZ |
57 | bool migration_incoming_in_colo_state(void) |
58 | { | |
59 | MigrationIncomingState *mis = migration_incoming_get_current(); | |
60 | ||
61 | return mis && (mis->state == MIGRATION_STATUS_COLO); | |
62 | } | |
63 | ||
b3f7f0c5 HZ |
64 | static bool colo_runstate_is_stopped(void) |
65 | { | |
66 | return runstate_check(RUN_STATE_COLO) || !runstate_is_running(); | |
67 | } | |
68 | ||
9d2db376 HZ |
69 | static void secondary_vm_do_failover(void) |
70 | { | |
3ebb9c4f ZC |
71 | /* COLO needs enable block-replication */ |
72 | #ifdef CONFIG_REPLICATION | |
9d2db376 HZ |
73 | int old_state; |
74 | MigrationIncomingState *mis = migration_incoming_get_current(); | |
8e48ac95 | 75 | Error *local_err = NULL; |
9d2db376 | 76 | |
a8664ba5 HZ |
77 | /* Can not do failover during the process of VM's loading VMstate, Or |
78 | * it will break the secondary VM. | |
79 | */ | |
80 | if (vmstate_loading) { | |
81 | old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, | |
82 | FAILOVER_STATUS_RELAUNCH); | |
83 | if (old_state != FAILOVER_STATUS_ACTIVE) { | |
84 | error_report("Unknown error while do failover for secondary VM," | |
977c736f | 85 | "old_state: %s", FailoverStatus_str(old_state)); |
a8664ba5 HZ |
86 | } |
87 | return; | |
88 | } | |
89 | ||
9d2db376 HZ |
90 | migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, |
91 | MIGRATION_STATUS_COMPLETED); | |
92 | ||
8e48ac95 ZC |
93 | replication_stop_all(true, &local_err); |
94 | if (local_err) { | |
95 | error_report_err(local_err); | |
27d07fcf | 96 | local_err = NULL; |
8e48ac95 ZC |
97 | } |
98 | ||
7b343530 HZ |
99 | /* Notify all filters of all NIC to do checkpoint */ |
100 | colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err); | |
101 | if (local_err) { | |
102 | error_report_err(local_err); | |
103 | } | |
104 | ||
9d2db376 HZ |
105 | if (!autostart) { |
106 | error_report("\"-S\" qemu option will be ignored in secondary side"); | |
107 | /* recover runstate to normal migration finish state */ | |
108 | autostart = true; | |
109 | } | |
c937b9a6 HZ |
110 | /* |
111 | * Make sure COLO incoming thread not block in recv or send, | |
112 | * If mis->from_src_file and mis->to_src_file use the same fd, | |
113 | * The second shutdown() will return -1, we ignore this value, | |
114 | * It is harmless. | |
115 | */ | |
116 | if (mis->from_src_file) { | |
117 | qemu_file_shutdown(mis->from_src_file); | |
118 | } | |
119 | if (mis->to_src_file) { | |
120 | qemu_file_shutdown(mis->to_src_file); | |
121 | } | |
9d2db376 HZ |
122 | |
123 | old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, | |
124 | FAILOVER_STATUS_COMPLETED); | |
125 | if (old_state != FAILOVER_STATUS_ACTIVE) { | |
126 | error_report("Incorrect state (%s) while doing failover for " | |
977c736f | 127 | "secondary VM", FailoverStatus_str(old_state)); |
9d2db376 HZ |
128 | return; |
129 | } | |
c937b9a6 HZ |
130 | /* Notify COLO incoming thread that failover work is finished */ |
131 | qemu_sem_post(&mis->colo_incoming_sem); | |
1fe6ab26 | 132 | |
9d2db376 HZ |
133 | /* For Secondary VM, jump to incoming co */ |
134 | if (mis->migration_incoming_co) { | |
135 | qemu_coroutine_enter(mis->migration_incoming_co); | |
136 | } | |
3ebb9c4f ZC |
137 | #else |
138 | abort(); | |
139 | #endif | |
9d2db376 HZ |
140 | } |
141 | ||
b3f7f0c5 HZ |
142 | static void primary_vm_do_failover(void) |
143 | { | |
3ebb9c4f | 144 | #ifdef CONFIG_REPLICATION |
b3f7f0c5 HZ |
145 | MigrationState *s = migrate_get_current(); |
146 | int old_state; | |
8e48ac95 | 147 | Error *local_err = NULL; |
b3f7f0c5 HZ |
148 | |
149 | migrate_set_state(&s->state, MIGRATION_STATUS_COLO, | |
150 | MIGRATION_STATUS_COMPLETED); | |
2518aec1 HZ |
151 | /* |
152 | * kick COLO thread which might wait at | |
153 | * qemu_sem_wait(&s->colo_checkpoint_sem). | |
154 | */ | |
155 | colo_checkpoint_notify(migrate_get_current()); | |
b3f7f0c5 | 156 | |
c937b9a6 HZ |
157 | /* |
158 | * Wake up COLO thread which may blocked in recv() or send(), | |
159 | * The s->rp_state.from_dst_file and s->to_dst_file may use the | |
160 | * same fd, but we still shutdown the fd for twice, it is harmless. | |
161 | */ | |
162 | if (s->to_dst_file) { | |
163 | qemu_file_shutdown(s->to_dst_file); | |
164 | } | |
165 | if (s->rp_state.from_dst_file) { | |
166 | qemu_file_shutdown(s->rp_state.from_dst_file); | |
167 | } | |
168 | ||
b3f7f0c5 HZ |
169 | old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, |
170 | FAILOVER_STATUS_COMPLETED); | |
171 | if (old_state != FAILOVER_STATUS_ACTIVE) { | |
172 | error_report("Incorrect state (%s) while doing failover for Primary VM", | |
977c736f | 173 | FailoverStatus_str(old_state)); |
b3f7f0c5 HZ |
174 | return; |
175 | } | |
8e48ac95 ZC |
176 | |
177 | replication_stop_all(true, &local_err); | |
178 | if (local_err) { | |
179 | error_report_err(local_err); | |
180 | local_err = NULL; | |
181 | } | |
182 | ||
c937b9a6 HZ |
183 | /* Notify COLO thread that failover work is finished */ |
184 | qemu_sem_post(&s->colo_exit_sem); | |
3ebb9c4f ZC |
185 | #else |
186 | abort(); | |
187 | #endif | |
b3f7f0c5 HZ |
188 | } |
189 | ||
aad555c2 ZC |
190 | COLOMode get_colo_mode(void) |
191 | { | |
192 | if (migration_in_colo_state()) { | |
193 | return COLO_MODE_PRIMARY; | |
194 | } else if (migration_incoming_in_colo_state()) { | |
195 | return COLO_MODE_SECONDARY; | |
196 | } else { | |
41b6b779 | 197 | return COLO_MODE_NONE; |
aad555c2 ZC |
198 | } |
199 | } | |
200 | ||
c0913d1d | 201 | void colo_do_failover(void) |
b3f7f0c5 HZ |
202 | { |
203 | /* Make sure VM stopped while failover happened. */ | |
204 | if (!colo_runstate_is_stopped()) { | |
205 | vm_stop_force_state(RUN_STATE_COLO); | |
206 | } | |
207 | ||
82cd368c ZC |
208 | switch (get_colo_mode()) { |
209 | case COLO_MODE_PRIMARY: | |
b3f7f0c5 | 210 | primary_vm_do_failover(); |
82cd368c ZC |
211 | break; |
212 | case COLO_MODE_SECONDARY: | |
9d2db376 | 213 | secondary_vm_do_failover(); |
82cd368c ZC |
214 | break; |
215 | default: | |
216 | error_report("colo_do_failover failed because the colo mode" | |
217 | " could not be obtained"); | |
b3f7f0c5 HZ |
218 | } |
219 | } | |
220 | ||
335d10cd | 221 | #ifdef CONFIG_REPLICATION |
2c9639ec ZC |
222 | void qmp_xen_set_replication(bool enable, bool primary, |
223 | bool has_failover, bool failover, | |
224 | Error **errp) | |
225 | { | |
226 | ReplicationMode mode = primary ? | |
227 | REPLICATION_MODE_PRIMARY : | |
228 | REPLICATION_MODE_SECONDARY; | |
229 | ||
230 | if (has_failover && enable) { | |
231 | error_setg(errp, "Parameter 'failover' is only for" | |
232 | " stopping replication"); | |
233 | return; | |
234 | } | |
235 | ||
236 | if (enable) { | |
237 | replication_start_all(mode, errp); | |
238 | } else { | |
239 | if (!has_failover) { | |
240 | failover = NULL; | |
241 | } | |
242 | replication_stop_all(failover, failover ? NULL : errp); | |
243 | } | |
244 | } | |
245 | ||
daa33c52 ZC |
246 | ReplicationStatus *qmp_query_xen_replication_status(Error **errp) |
247 | { | |
248 | Error *err = NULL; | |
249 | ReplicationStatus *s = g_new0(ReplicationStatus, 1); | |
250 | ||
251 | replication_get_error_all(&err); | |
252 | if (err) { | |
253 | s->error = true; | |
254 | s->has_desc = true; | |
255 | s->desc = g_strdup(error_get_pretty(err)); | |
256 | } else { | |
257 | s->error = false; | |
258 | } | |
259 | ||
260 | error_free(err); | |
261 | return s; | |
262 | } | |
263 | ||
264 | void qmp_xen_colo_do_checkpoint(Error **errp) | |
265 | { | |
735527e1 MA |
266 | Error *err = NULL; |
267 | ||
268 | replication_do_checkpoint_all(&err); | |
269 | if (err) { | |
270 | error_propagate(errp, err); | |
271 | return; | |
272 | } | |
0e8818f0 ZC |
273 | /* Notify all filters of all NIC to do checkpoint */ |
274 | colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp); | |
daa33c52 | 275 | } |
335d10cd | 276 | #endif |
daa33c52 | 277 | |
f56c0065 ZC |
278 | COLOStatus *qmp_query_colo_status(Error **errp) |
279 | { | |
280 | COLOStatus *s = g_new0(COLOStatus, 1); | |
281 | ||
282 | s->mode = get_colo_mode(); | |
5ed0deca | 283 | s->last_mode = last_colo_mode; |
f56c0065 ZC |
284 | |
285 | switch (failover_get_state()) { | |
286 | case FAILOVER_STATUS_NONE: | |
287 | s->reason = COLO_EXIT_REASON_NONE; | |
288 | break; | |
1fe6ab26 | 289 | case FAILOVER_STATUS_COMPLETED: |
f56c0065 ZC |
290 | s->reason = COLO_EXIT_REASON_REQUEST; |
291 | break; | |
292 | default: | |
3a43ac47 ZC |
293 | if (migration_in_colo_state()) { |
294 | s->reason = COLO_EXIT_REASON_PROCESSING; | |
295 | } else { | |
296 | s->reason = COLO_EXIT_REASON_ERROR; | |
297 | } | |
f56c0065 ZC |
298 | } |
299 | ||
300 | return s; | |
301 | } | |
302 | ||
4f97558e HZ |
303 | static void colo_send_message(QEMUFile *f, COLOMessage msg, |
304 | Error **errp) | |
305 | { | |
306 | int ret; | |
307 | ||
308 | if (msg >= COLO_MESSAGE__MAX) { | |
309 | error_setg(errp, "%s: Invalid message", __func__); | |
310 | return; | |
311 | } | |
312 | qemu_put_be32(f, msg); | |
313 | qemu_fflush(f); | |
314 | ||
315 | ret = qemu_file_get_error(f); | |
316 | if (ret < 0) { | |
317 | error_setg_errno(errp, -ret, "Can't send COLO message"); | |
318 | } | |
977c736f | 319 | trace_colo_send_message(COLOMessage_str(msg)); |
4f97558e HZ |
320 | } |
321 | ||
a91246c9 HZ |
322 | static void colo_send_message_value(QEMUFile *f, COLOMessage msg, |
323 | uint64_t value, Error **errp) | |
324 | { | |
325 | Error *local_err = NULL; | |
326 | int ret; | |
327 | ||
328 | colo_send_message(f, msg, &local_err); | |
329 | if (local_err) { | |
330 | error_propagate(errp, local_err); | |
331 | return; | |
332 | } | |
333 | qemu_put_be64(f, value); | |
334 | qemu_fflush(f); | |
335 | ||
336 | ret = qemu_file_get_error(f); | |
337 | if (ret < 0) { | |
338 | error_setg_errno(errp, -ret, "Failed to send value for message:%s", | |
977c736f | 339 | COLOMessage_str(msg)); |
a91246c9 HZ |
340 | } |
341 | } | |
342 | ||
4f97558e HZ |
343 | static COLOMessage colo_receive_message(QEMUFile *f, Error **errp) |
344 | { | |
345 | COLOMessage msg; | |
346 | int ret; | |
347 | ||
348 | msg = qemu_get_be32(f); | |
349 | ret = qemu_file_get_error(f); | |
350 | if (ret < 0) { | |
351 | error_setg_errno(errp, -ret, "Can't receive COLO message"); | |
352 | return msg; | |
353 | } | |
354 | if (msg >= COLO_MESSAGE__MAX) { | |
355 | error_setg(errp, "%s: Invalid message", __func__); | |
356 | return msg; | |
357 | } | |
977c736f | 358 | trace_colo_receive_message(COLOMessage_str(msg)); |
4f97558e HZ |
359 | return msg; |
360 | } | |
361 | ||
362 | static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg, | |
363 | Error **errp) | |
364 | { | |
365 | COLOMessage msg; | |
366 | Error *local_err = NULL; | |
367 | ||
368 | msg = colo_receive_message(f, &local_err); | |
369 | if (local_err) { | |
370 | error_propagate(errp, local_err); | |
371 | return; | |
372 | } | |
373 | if (msg != expect_msg) { | |
374 | error_setg(errp, "Unexpected COLO message %d, expected %d", | |
375 | msg, expect_msg); | |
376 | } | |
377 | } | |
378 | ||
4291d372 HZ |
379 | static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg, |
380 | Error **errp) | |
381 | { | |
382 | Error *local_err = NULL; | |
383 | uint64_t value; | |
384 | int ret; | |
385 | ||
386 | colo_receive_check_message(f, expect_msg, &local_err); | |
387 | if (local_err) { | |
388 | error_propagate(errp, local_err); | |
389 | return 0; | |
390 | } | |
391 | ||
392 | value = qemu_get_be64(f); | |
393 | ret = qemu_file_get_error(f); | |
394 | if (ret < 0) { | |
395 | error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s", | |
977c736f | 396 | COLOMessage_str(expect_msg)); |
4291d372 HZ |
397 | } |
398 | return value; | |
399 | } | |
400 | ||
a91246c9 HZ |
401 | static int colo_do_checkpoint_transaction(MigrationState *s, |
402 | QIOChannelBuffer *bioc, | |
403 | QEMUFile *fb) | |
4f97558e HZ |
404 | { |
405 | Error *local_err = NULL; | |
a91246c9 | 406 | int ret = -1; |
4f97558e HZ |
407 | |
408 | colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, | |
409 | &local_err); | |
410 | if (local_err) { | |
411 | goto out; | |
412 | } | |
413 | ||
414 | colo_receive_check_message(s->rp_state.from_dst_file, | |
415 | COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); | |
416 | if (local_err) { | |
417 | goto out; | |
418 | } | |
a91246c9 HZ |
419 | /* Reset channel-buffer directly */ |
420 | qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); | |
421 | bioc->usage = 0; | |
4f97558e | 422 | |
a91246c9 | 423 | qemu_mutex_lock_iothread(); |
b3f7f0c5 HZ |
424 | if (failover_get_state() != FAILOVER_STATUS_NONE) { |
425 | qemu_mutex_unlock_iothread(); | |
426 | goto out; | |
427 | } | |
a91246c9 HZ |
428 | vm_stop_force_state(RUN_STATE_COLO); |
429 | qemu_mutex_unlock_iothread(); | |
430 | trace_colo_vm_state_change("run", "stop"); | |
b3f7f0c5 HZ |
431 | /* |
432 | * Failover request bh could be called after vm_stop_force_state(), | |
433 | * So we need check failover_request_is_active() again. | |
434 | */ | |
435 | if (failover_get_state() != FAILOVER_STATUS_NONE) { | |
436 | goto out; | |
437 | } | |
a91246c9 HZ |
438 | |
439 | /* Disable block migration */ | |
ce7c817c | 440 | migrate_set_block_enabled(false, &local_err); |
58602676 PMD |
441 | if (local_err) { |
442 | goto out; | |
443 | } | |
a91246c9 | 444 | qemu_mutex_lock_iothread(); |
3ebb9c4f ZC |
445 | |
446 | #ifdef CONFIG_REPLICATION | |
8e48ac95 ZC |
447 | replication_do_checkpoint_all(&local_err); |
448 | if (local_err) { | |
449 | qemu_mutex_unlock_iothread(); | |
450 | goto out; | |
451 | } | |
3ebb9c4f ZC |
452 | #else |
453 | abort(); | |
454 | #endif | |
4f97558e HZ |
455 | |
456 | colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); | |
457 | if (local_err) { | |
3f6df99d ZC |
458 | qemu_mutex_unlock_iothread(); |
459 | goto out; | |
460 | } | |
461 | /* Note: device state is saved into buffer */ | |
462 | ret = qemu_save_device_state(fb); | |
463 | ||
464 | qemu_mutex_unlock_iothread(); | |
465 | if (ret < 0) { | |
4f97558e HZ |
466 | goto out; |
467 | } | |
3f6df99d ZC |
468 | /* |
469 | * Only save VM's live state, which not including device state. | |
470 | * TODO: We may need a timeout mechanism to prevent COLO process | |
471 | * to be blocked here. | |
472 | */ | |
473 | qemu_savevm_live_state(s->to_dst_file); | |
474 | ||
475 | qemu_fflush(fb); | |
476 | ||
a91246c9 HZ |
477 | /* |
478 | * We need the size of the VMstate data in Secondary side, | |
479 | * With which we can decide how much data should be read. | |
480 | */ | |
481 | colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE, | |
482 | bioc->usage, &local_err); | |
483 | if (local_err) { | |
484 | goto out; | |
485 | } | |
4f97558e | 486 | |
a91246c9 HZ |
487 | qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage); |
488 | qemu_fflush(s->to_dst_file); | |
489 | ret = qemu_file_get_error(s->to_dst_file); | |
490 | if (ret < 0) { | |
491 | goto out; | |
492 | } | |
4f97558e HZ |
493 | |
494 | colo_receive_check_message(s->rp_state.from_dst_file, | |
495 | COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); | |
496 | if (local_err) { | |
497 | goto out; | |
498 | } | |
499 | ||
4fa8ed25 LS |
500 | qemu_event_reset(&s->colo_checkpoint_event); |
501 | colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); | |
502 | if (local_err) { | |
503 | goto out; | |
504 | } | |
505 | ||
4f97558e HZ |
506 | colo_receive_check_message(s->rp_state.from_dst_file, |
507 | COLO_MESSAGE_VMSTATE_LOADED, &local_err); | |
508 | if (local_err) { | |
509 | goto out; | |
510 | } | |
511 | ||
a91246c9 HZ |
512 | ret = 0; |
513 | ||
514 | qemu_mutex_lock_iothread(); | |
515 | vm_start(); | |
516 | qemu_mutex_unlock_iothread(); | |
517 | trace_colo_vm_state_change("stop", "run"); | |
4f97558e | 518 | |
4f97558e HZ |
519 | out: |
520 | if (local_err) { | |
521 | error_report_err(local_err); | |
522 | } | |
a91246c9 | 523 | return ret; |
4f97558e HZ |
524 | } |
525 | ||
131b2153 ZC |
526 | static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) |
527 | { | |
528 | colo_checkpoint_notify(data); | |
529 | } | |
530 | ||
0b827d5e HZ |
531 | static void colo_process_checkpoint(MigrationState *s) |
532 | { | |
a91246c9 HZ |
533 | QIOChannelBuffer *bioc; |
534 | QEMUFile *fb = NULL; | |
479125d5 | 535 | int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); |
4f97558e HZ |
536 | Error *local_err = NULL; |
537 | int ret; | |
538 | ||
5ed0deca ZC |
539 | last_colo_mode = get_colo_mode(); |
540 | if (last_colo_mode != COLO_MODE_PRIMARY) { | |
541 | error_report("COLO mode must be COLO_MODE_PRIMARY"); | |
542 | return; | |
543 | } | |
544 | ||
aef06085 HZ |
545 | failover_init_state(); |
546 | ||
56ba83d2 HZ |
547 | s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file); |
548 | if (!s->rp_state.from_dst_file) { | |
549 | error_report("Open QEMUFile from_dst_file failed"); | |
550 | goto out; | |
551 | } | |
552 | ||
131b2153 ZC |
553 | packets_compare_notifier.notify = colo_compare_notify_checkpoint; |
554 | colo_compare_register_notifier(&packets_compare_notifier); | |
555 | ||
4f97558e HZ |
556 | /* |
557 | * Wait for Secondary finish loading VM states and enter COLO | |
558 | * restore. | |
559 | */ | |
560 | colo_receive_check_message(s->rp_state.from_dst_file, | |
561 | COLO_MESSAGE_CHECKPOINT_READY, &local_err); | |
562 | if (local_err) { | |
563 | goto out; | |
564 | } | |
a91246c9 HZ |
565 | bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); |
566 | fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); | |
567 | object_unref(OBJECT(bioc)); | |
4f97558e | 568 | |
0b827d5e | 569 | qemu_mutex_lock_iothread(); |
3ebb9c4f | 570 | #ifdef CONFIG_REPLICATION |
8e48ac95 ZC |
571 | replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); |
572 | if (local_err) { | |
573 | qemu_mutex_unlock_iothread(); | |
574 | goto out; | |
575 | } | |
3ebb9c4f ZC |
576 | #else |
577 | abort(); | |
578 | #endif | |
8e48ac95 | 579 | |
0b827d5e HZ |
580 | vm_start(); |
581 | qemu_mutex_unlock_iothread(); | |
582 | trace_colo_vm_state_change("stop", "run"); | |
583 | ||
479125d5 HZ |
584 | timer_mod(s->colo_delay_timer, |
585 | current_time + s->parameters.x_checkpoint_delay); | |
586 | ||
4f97558e | 587 | while (s->state == MIGRATION_STATUS_COLO) { |
b3f7f0c5 HZ |
588 | if (failover_get_state() != FAILOVER_STATUS_NONE) { |
589 | error_report("failover request"); | |
590 | goto out; | |
591 | } | |
592 | ||
bb70b66e | 593 | qemu_event_wait(&s->colo_checkpoint_event); |
18cc23d7 | 594 | |
2518aec1 HZ |
595 | if (s->state != MIGRATION_STATUS_COLO) { |
596 | goto out; | |
597 | } | |
a91246c9 | 598 | ret = colo_do_checkpoint_transaction(s, bioc, fb); |
4f97558e HZ |
599 | if (ret < 0) { |
600 | goto out; | |
601 | } | |
602 | } | |
0b827d5e | 603 | |
56ba83d2 | 604 | out: |
4f97558e HZ |
605 | /* Throw the unreported error message after exited from loop */ |
606 | if (local_err) { | |
607 | error_report_err(local_err); | |
608 | } | |
609 | ||
a91246c9 HZ |
610 | if (fb) { |
611 | qemu_fclose(fb); | |
612 | } | |
613 | ||
9ecff6d6 HZ |
614 | /* |
615 | * There are only two reasons we can get here, some error happened | |
616 | * or the user triggered failover. | |
617 | */ | |
618 | switch (failover_get_state()) { | |
1fe6ab26 | 619 | case FAILOVER_STATUS_COMPLETED: |
9ecff6d6 HZ |
620 | qapi_event_send_colo_exit(COLO_MODE_PRIMARY, |
621 | COLO_EXIT_REASON_REQUEST); | |
622 | break; | |
623 | default: | |
3a43ac47 ZC |
624 | qapi_event_send_colo_exit(COLO_MODE_PRIMARY, |
625 | COLO_EXIT_REASON_ERROR); | |
9ecff6d6 HZ |
626 | } |
627 | ||
c937b9a6 HZ |
628 | /* Hope this not to be too long to wait here */ |
629 | qemu_sem_wait(&s->colo_exit_sem); | |
630 | qemu_sem_destroy(&s->colo_exit_sem); | |
131b2153 ZC |
631 | |
632 | /* | |
633 | * It is safe to unregister notifier after failover finished. | |
634 | * Besides, colo_delay_timer and colo_checkpoint_sem can't be | |
635 | * released befor unregister notifier, or there will be use-after-free | |
636 | * error. | |
637 | */ | |
638 | colo_compare_unregister_notifier(&packets_compare_notifier); | |
639 | timer_del(s->colo_delay_timer); | |
640 | timer_free(s->colo_delay_timer); | |
bb70b66e | 641 | qemu_event_destroy(&s->colo_checkpoint_event); |
131b2153 | 642 | |
c937b9a6 HZ |
643 | /* |
644 | * Must be called after failover BH is completed, | |
645 | * Or the failover BH may shutdown the wrong fd that | |
646 | * re-used by other threads after we release here. | |
647 | */ | |
56ba83d2 HZ |
648 | if (s->rp_state.from_dst_file) { |
649 | qemu_fclose(s->rp_state.from_dst_file); | |
650 | } | |
0b827d5e HZ |
651 | } |
652 | ||
479125d5 HZ |
653 | void colo_checkpoint_notify(void *opaque) |
654 | { | |
655 | MigrationState *s = opaque; | |
656 | int64_t next_notify_time; | |
657 | ||
bb70b66e | 658 | qemu_event_set(&s->colo_checkpoint_event); |
479125d5 HZ |
659 | s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); |
660 | next_notify_time = s->colo_checkpoint_time + | |
661 | s->parameters.x_checkpoint_delay; | |
662 | timer_mod(s->colo_delay_timer, next_notify_time); | |
663 | } | |
664 | ||
0b827d5e HZ |
665 | void migrate_start_colo_process(MigrationState *s) |
666 | { | |
667 | qemu_mutex_unlock_iothread(); | |
bb70b66e | 668 | qemu_event_init(&s->colo_checkpoint_event, false); |
479125d5 HZ |
669 | s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, |
670 | colo_checkpoint_notify, s); | |
671 | ||
c937b9a6 | 672 | qemu_sem_init(&s->colo_exit_sem, 0); |
0b827d5e HZ |
673 | migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, |
674 | MIGRATION_STATUS_COLO); | |
675 | colo_process_checkpoint(s); | |
676 | qemu_mutex_lock_iothread(); | |
677 | } | |
25d0c16f | 678 | |
6ad8ad38 HZ |
679 | static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, |
680 | QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp) | |
681 | { | |
682 | uint64_t total_size; | |
683 | uint64_t value; | |
684 | Error *local_err = NULL; | |
685 | int ret; | |
686 | ||
687 | qemu_mutex_lock_iothread(); | |
688 | vm_stop_force_state(RUN_STATE_COLO); | |
689 | trace_colo_vm_state_change("run", "stop"); | |
690 | qemu_mutex_unlock_iothread(); | |
691 | ||
692 | /* FIXME: This is unnecessary for periodic checkpoint mode */ | |
693 | colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, | |
694 | &local_err); | |
695 | if (local_err) { | |
696 | error_propagate(errp, local_err); | |
697 | return; | |
698 | } | |
699 | ||
700 | colo_receive_check_message(mis->from_src_file, | |
701 | COLO_MESSAGE_VMSTATE_SEND, &local_err); | |
702 | if (local_err) { | |
703 | error_propagate(errp, local_err); | |
704 | return; | |
705 | } | |
706 | ||
707 | qemu_mutex_lock_iothread(); | |
786d8b8e | 708 | cpu_synchronize_all_states(); |
6ad8ad38 HZ |
709 | ret = qemu_loadvm_state_main(mis->from_src_file, mis); |
710 | qemu_mutex_unlock_iothread(); | |
711 | ||
712 | if (ret < 0) { | |
713 | error_setg(errp, "Load VM's live state (ram) error"); | |
714 | return; | |
715 | } | |
716 | ||
717 | value = colo_receive_message_value(mis->from_src_file, | |
718 | COLO_MESSAGE_VMSTATE_SIZE, &local_err); | |
719 | if (local_err) { | |
720 | error_propagate(errp, local_err); | |
721 | return; | |
722 | } | |
723 | ||
724 | /* | |
725 | * Read VM device state data into channel buffer, | |
726 | * It's better to re-use the memory allocated. | |
727 | * Here we need to handle the channel buffer directly. | |
728 | */ | |
729 | if (value > bioc->capacity) { | |
730 | bioc->capacity = value; | |
731 | bioc->data = g_realloc(bioc->data, bioc->capacity); | |
732 | } | |
733 | total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value); | |
734 | if (total_size != value) { | |
735 | error_setg(errp, "Got %" PRIu64 " VMState data, less than expected" | |
736 | " %" PRIu64, total_size, value); | |
737 | return; | |
738 | } | |
739 | bioc->usage = total_size; | |
740 | qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); | |
741 | ||
742 | colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, | |
743 | &local_err); | |
744 | if (local_err) { | |
745 | error_propagate(errp, local_err); | |
746 | return; | |
747 | } | |
748 | ||
749 | qemu_mutex_lock_iothread(); | |
750 | vmstate_loading = true; | |
24fa16f8 | 751 | colo_flush_ram_cache(); |
6ad8ad38 HZ |
752 | ret = qemu_load_device_state(fb); |
753 | if (ret < 0) { | |
754 | error_setg(errp, "COLO: load device state failed"); | |
92c932de | 755 | vmstate_loading = false; |
6ad8ad38 HZ |
756 | qemu_mutex_unlock_iothread(); |
757 | return; | |
758 | } | |
759 | ||
760 | #ifdef CONFIG_REPLICATION | |
761 | replication_get_error_all(&local_err); | |
762 | if (local_err) { | |
763 | error_propagate(errp, local_err); | |
92c932de | 764 | vmstate_loading = false; |
6ad8ad38 HZ |
765 | qemu_mutex_unlock_iothread(); |
766 | return; | |
767 | } | |
768 | ||
769 | /* discard colo disk buffer */ | |
770 | replication_do_checkpoint_all(&local_err); | |
771 | if (local_err) { | |
772 | error_propagate(errp, local_err); | |
92c932de | 773 | vmstate_loading = false; |
6ad8ad38 HZ |
774 | qemu_mutex_unlock_iothread(); |
775 | return; | |
776 | } | |
777 | #else | |
778 | abort(); | |
779 | #endif | |
780 | /* Notify all filters of all NIC to do checkpoint */ | |
781 | colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); | |
782 | ||
783 | if (local_err) { | |
784 | error_propagate(errp, local_err); | |
92c932de | 785 | vmstate_loading = false; |
6ad8ad38 HZ |
786 | qemu_mutex_unlock_iothread(); |
787 | return; | |
788 | } | |
789 | ||
790 | vmstate_loading = false; | |
791 | vm_start(); | |
792 | trace_colo_vm_state_change("stop", "run"); | |
793 | qemu_mutex_unlock_iothread(); | |
794 | ||
795 | if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { | |
6ad8ad38 HZ |
796 | return; |
797 | } | |
798 | ||
799 | colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, | |
800 | &local_err); | |
801 | if (local_err) { | |
802 | error_propagate(errp, local_err); | |
803 | } | |
804 | } | |
805 | ||
806 | static void colo_wait_handle_message(MigrationIncomingState *mis, | |
807 | QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp) | |
4f97558e HZ |
808 | { |
809 | COLOMessage msg; | |
810 | Error *local_err = NULL; | |
811 | ||
6ad8ad38 | 812 | msg = colo_receive_message(mis->from_src_file, &local_err); |
4f97558e HZ |
813 | if (local_err) { |
814 | error_propagate(errp, local_err); | |
815 | return; | |
816 | } | |
817 | ||
818 | switch (msg) { | |
819 | case COLO_MESSAGE_CHECKPOINT_REQUEST: | |
6ad8ad38 | 820 | colo_incoming_process_checkpoint(mis, fb, bioc, errp); |
4f97558e HZ |
821 | break; |
822 | default: | |
4f97558e HZ |
823 | error_setg(errp, "Got unknown COLO message: %d", msg); |
824 | break; | |
825 | } | |
826 | } | |
827 | ||
25d0c16f HZ |
828 | void *colo_process_incoming_thread(void *opaque) |
829 | { | |
830 | MigrationIncomingState *mis = opaque; | |
4291d372 HZ |
831 | QEMUFile *fb = NULL; |
832 | QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */ | |
4f97558e | 833 | Error *local_err = NULL; |
25d0c16f | 834 | |
74637e6f | 835 | rcu_register_thread(); |
c937b9a6 HZ |
836 | qemu_sem_init(&mis->colo_incoming_sem, 0); |
837 | ||
25d0c16f HZ |
838 | migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, |
839 | MIGRATION_STATUS_COLO); | |
840 | ||
5ed0deca ZC |
841 | last_colo_mode = get_colo_mode(); |
842 | if (last_colo_mode != COLO_MODE_SECONDARY) { | |
843 | error_report("COLO mode must be COLO_MODE_SECONDARY"); | |
844 | return NULL; | |
845 | } | |
846 | ||
aef06085 HZ |
847 | failover_init_state(); |
848 | ||
56ba83d2 HZ |
849 | mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); |
850 | if (!mis->to_src_file) { | |
851 | error_report("COLO incoming thread: Open QEMUFile to_src_file failed"); | |
852 | goto out; | |
853 | } | |
854 | /* | |
855 | * Note: the communication between Primary side and Secondary side | |
856 | * should be sequential, we set the fd to unblocked in migration incoming | |
857 | * coroutine, and here we are in the COLO incoming thread, so it is ok to | |
858 | * set the fd back to blocked. | |
859 | */ | |
860 | qemu_file_set_blocking(mis->from_src_file, true); | |
861 | ||
0393031a HZ |
862 | colo_incoming_start_dirty_log(); |
863 | ||
4291d372 HZ |
864 | bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); |
865 | fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); | |
866 | object_unref(OBJECT(bioc)); | |
867 | ||
131b2153 | 868 | qemu_mutex_lock_iothread(); |
3ebb9c4f | 869 | #ifdef CONFIG_REPLICATION |
8e48ac95 ZC |
870 | replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); |
871 | if (local_err) { | |
872 | qemu_mutex_unlock_iothread(); | |
873 | goto out; | |
874 | } | |
3ebb9c4f ZC |
875 | #else |
876 | abort(); | |
877 | #endif | |
131b2153 ZC |
878 | vm_start(); |
879 | trace_colo_vm_state_change("stop", "run"); | |
880 | qemu_mutex_unlock_iothread(); | |
881 | ||
4f97558e HZ |
882 | colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, |
883 | &local_err); | |
884 | if (local_err) { | |
885 | goto out; | |
886 | } | |
887 | ||
888 | while (mis->state == MIGRATION_STATUS_COLO) { | |
6ad8ad38 | 889 | colo_wait_handle_message(mis, fb, bioc, &local_err); |
4f97558e | 890 | if (local_err) { |
6ad8ad38 HZ |
891 | error_report_err(local_err); |
892 | break; | |
4f97558e | 893 | } |
92c932de LS |
894 | |
895 | if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { | |
896 | failover_set_state(FAILOVER_STATUS_RELAUNCH, | |
897 | FAILOVER_STATUS_NONE); | |
898 | failover_request_active(NULL); | |
899 | break; | |
900 | } | |
901 | ||
9d2db376 HZ |
902 | if (failover_get_state() != FAILOVER_STATUS_NONE) { |
903 | error_report("failover request"); | |
6ad8ad38 | 904 | break; |
4f97558e HZ |
905 | } |
906 | } | |
25d0c16f | 907 | |
56ba83d2 | 908 | out: |
3a43ac47 ZC |
909 | /* |
910 | * There are only two reasons we can get here, some error happened | |
911 | * or the user triggered failover. | |
912 | */ | |
9ecff6d6 | 913 | switch (failover_get_state()) { |
1fe6ab26 | 914 | case FAILOVER_STATUS_COMPLETED: |
9ecff6d6 HZ |
915 | qapi_event_send_colo_exit(COLO_MODE_SECONDARY, |
916 | COLO_EXIT_REASON_REQUEST); | |
917 | break; | |
918 | default: | |
3a43ac47 ZC |
919 | qapi_event_send_colo_exit(COLO_MODE_SECONDARY, |
920 | COLO_EXIT_REASON_ERROR); | |
9ecff6d6 HZ |
921 | } |
922 | ||
4291d372 HZ |
923 | if (fb) { |
924 | qemu_fclose(fb); | |
925 | } | |
926 | ||
c937b9a6 HZ |
927 | /* Hope this not to be too long to loop here */ |
928 | qemu_sem_wait(&mis->colo_incoming_sem); | |
929 | qemu_sem_destroy(&mis->colo_incoming_sem); | |
930 | /* Must be called after failover BH is completed */ | |
56ba83d2 HZ |
931 | if (mis->to_src_file) { |
932 | qemu_fclose(mis->to_src_file); | |
b8b5734b | 933 | mis->to_src_file = NULL; |
56ba83d2 | 934 | } |
25d0c16f | 935 | |
74637e6f | 936 | rcu_unregister_thread(); |
25d0c16f HZ |
937 | return NULL; |
938 | } |