]> Git Repo - qemu.git/blob - migration/ram.c
Merge remote-tracking branch 'remotes/aurel/tags/pull-sh4-next-20150612' into staging
[qemu.git] / migration / ram.c
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2011-2015 Red Hat Inc
6  *
7  * Authors:
8  *  Juan Quintela <[email protected]>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 #include <stdint.h>
29 #include <zlib.h>
30 #include "qemu/bitops.h"
31 #include "qemu/bitmap.h"
32 #include "qemu/timer.h"
33 #include "qemu/main-loop.h"
34 #include "migration/migration.h"
35 #include "exec/address-spaces.h"
36 #include "migration/page_cache.h"
37 #include "qemu/error-report.h"
38 #include "trace.h"
39 #include "exec/ram_addr.h"
40 #include "qemu/rcu_queue.h"
41
42 #ifdef DEBUG_MIGRATION_RAM
43 #define DPRINTF(fmt, ...) \
44     do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define DPRINTF(fmt, ...) \
47     do { } while (0)
48 #endif
49
50 static bool mig_throttle_on;
51 static int dirty_rate_high_cnt;
52 static void check_guest_throttling(void);
53
54 static uint64_t bitmap_sync_count;
55
56 /***********************************************************/
57 /* ram save/restore */
58
59 #define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
60 #define RAM_SAVE_FLAG_COMPRESS 0x02
61 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
62 #define RAM_SAVE_FLAG_PAGE     0x08
63 #define RAM_SAVE_FLAG_EOS      0x10
64 #define RAM_SAVE_FLAG_CONTINUE 0x20
65 #define RAM_SAVE_FLAG_XBZRLE   0x40
66 /* 0x80 is reserved in migration.h start with 0x100 next */
67 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
68
69 static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
70
71 static inline bool is_zero_range(uint8_t *p, uint64_t size)
72 {
73     return buffer_find_nonzero_offset(p, size) == size;
74 }
75
76 /* struct contains XBZRLE cache and a static page
77    used by the compression */
78 static struct {
79     /* buffer used for XBZRLE encoding */
80     uint8_t *encoded_buf;
81     /* buffer for storing page content */
82     uint8_t *current_buf;
83     /* Cache for XBZRLE, Protected by lock. */
84     PageCache *cache;
85     QemuMutex lock;
86 } XBZRLE;
87
88 /* buffer used for XBZRLE decoding */
89 static uint8_t *xbzrle_decoded_buf;
90
91 static void XBZRLE_cache_lock(void)
92 {
93     if (migrate_use_xbzrle())
94         qemu_mutex_lock(&XBZRLE.lock);
95 }
96
97 static void XBZRLE_cache_unlock(void)
98 {
99     if (migrate_use_xbzrle())
100         qemu_mutex_unlock(&XBZRLE.lock);
101 }
102
103 /*
104  * called from qmp_migrate_set_cache_size in main thread, possibly while
105  * a migration is in progress.
106  * A running migration maybe using the cache and might finish during this
107  * call, hence changes to the cache are protected by XBZRLE.lock().
108  */
109 int64_t xbzrle_cache_resize(int64_t new_size)
110 {
111     PageCache *new_cache;
112     int64_t ret;
113
114     if (new_size < TARGET_PAGE_SIZE) {
115         return -1;
116     }
117
118     XBZRLE_cache_lock();
119
120     if (XBZRLE.cache != NULL) {
121         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
122             goto out_new_size;
123         }
124         new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
125                                         TARGET_PAGE_SIZE);
126         if (!new_cache) {
127             error_report("Error creating cache");
128             ret = -1;
129             goto out;
130         }
131
132         cache_fini(XBZRLE.cache);
133         XBZRLE.cache = new_cache;
134     }
135
136 out_new_size:
137     ret = pow2floor(new_size);
138 out:
139     XBZRLE_cache_unlock();
140     return ret;
141 }
142
143 /* accounting for migration statistics */
144 typedef struct AccountingInfo {
145     uint64_t dup_pages;
146     uint64_t skipped_pages;
147     uint64_t norm_pages;
148     uint64_t iterations;
149     uint64_t xbzrle_bytes;
150     uint64_t xbzrle_pages;
151     uint64_t xbzrle_cache_miss;
152     double xbzrle_cache_miss_rate;
153     uint64_t xbzrle_overflows;
154 } AccountingInfo;
155
156 static AccountingInfo acct_info;
157
158 static void acct_clear(void)
159 {
160     memset(&acct_info, 0, sizeof(acct_info));
161 }
162
163 uint64_t dup_mig_bytes_transferred(void)
164 {
165     return acct_info.dup_pages * TARGET_PAGE_SIZE;
166 }
167
168 uint64_t dup_mig_pages_transferred(void)
169 {
170     return acct_info.dup_pages;
171 }
172
173 uint64_t skipped_mig_bytes_transferred(void)
174 {
175     return acct_info.skipped_pages * TARGET_PAGE_SIZE;
176 }
177
178 uint64_t skipped_mig_pages_transferred(void)
179 {
180     return acct_info.skipped_pages;
181 }
182
183 uint64_t norm_mig_bytes_transferred(void)
184 {
185     return acct_info.norm_pages * TARGET_PAGE_SIZE;
186 }
187
188 uint64_t norm_mig_pages_transferred(void)
189 {
190     return acct_info.norm_pages;
191 }
192
193 uint64_t xbzrle_mig_bytes_transferred(void)
194 {
195     return acct_info.xbzrle_bytes;
196 }
197
198 uint64_t xbzrle_mig_pages_transferred(void)
199 {
200     return acct_info.xbzrle_pages;
201 }
202
203 uint64_t xbzrle_mig_pages_cache_miss(void)
204 {
205     return acct_info.xbzrle_cache_miss;
206 }
207
208 double xbzrle_mig_cache_miss_rate(void)
209 {
210     return acct_info.xbzrle_cache_miss_rate;
211 }
212
213 uint64_t xbzrle_mig_pages_overflow(void)
214 {
215     return acct_info.xbzrle_overflows;
216 }
217
218 /* This is the last block that we have visited serching for dirty pages
219  */
220 static RAMBlock *last_seen_block;
221 /* This is the last block from where we have sent data */
222 static RAMBlock *last_sent_block;
223 static ram_addr_t last_offset;
224 static unsigned long *migration_bitmap;
225 static uint64_t migration_dirty_pages;
226 static uint32_t last_version;
227 static bool ram_bulk_stage;
228
229 struct CompressParam {
230     bool start;
231     bool done;
232     QEMUFile *file;
233     QemuMutex mutex;
234     QemuCond cond;
235     RAMBlock *block;
236     ram_addr_t offset;
237 };
238 typedef struct CompressParam CompressParam;
239
240 struct DecompressParam {
241     bool start;
242     QemuMutex mutex;
243     QemuCond cond;
244     void *des;
245     uint8 *compbuf;
246     int len;
247 };
248 typedef struct DecompressParam DecompressParam;
249
250 static CompressParam *comp_param;
251 static QemuThread *compress_threads;
252 /* comp_done_cond is used to wake up the migration thread when
253  * one of the compression threads has finished the compression.
254  * comp_done_lock is used to co-work with comp_done_cond.
255  */
256 static QemuMutex *comp_done_lock;
257 static QemuCond *comp_done_cond;
258 /* The empty QEMUFileOps will be used by file in CompressParam */
259 static const QEMUFileOps empty_ops = { };
260
261 static bool compression_switch;
262 static bool quit_comp_thread;
263 static bool quit_decomp_thread;
264 static DecompressParam *decomp_param;
265 static QemuThread *decompress_threads;
266 static uint8_t *compressed_data_buf;
267
268 static int do_compress_ram_page(CompressParam *param);
269
270 static void *do_data_compress(void *opaque)
271 {
272     CompressParam *param = opaque;
273
274     while (!quit_comp_thread) {
275         qemu_mutex_lock(&param->mutex);
276         /* Re-check the quit_comp_thread in case of
277          * terminate_compression_threads is called just before
278          * qemu_mutex_lock(&param->mutex) and after
279          * while(!quit_comp_thread), re-check it here can make
280          * sure the compression thread terminate as expected.
281          */
282         while (!param->start && !quit_comp_thread) {
283             qemu_cond_wait(&param->cond, &param->mutex);
284         }
285         if (!quit_comp_thread) {
286             do_compress_ram_page(param);
287         }
288         param->start = false;
289         qemu_mutex_unlock(&param->mutex);
290
291         qemu_mutex_lock(comp_done_lock);
292         param->done = true;
293         qemu_cond_signal(comp_done_cond);
294         qemu_mutex_unlock(comp_done_lock);
295     }
296
297     return NULL;
298 }
299
300 static inline void terminate_compression_threads(void)
301 {
302     int idx, thread_count;
303
304     thread_count = migrate_compress_threads();
305     quit_comp_thread = true;
306     for (idx = 0; idx < thread_count; idx++) {
307         qemu_mutex_lock(&comp_param[idx].mutex);
308         qemu_cond_signal(&comp_param[idx].cond);
309         qemu_mutex_unlock(&comp_param[idx].mutex);
310     }
311 }
312
313 void migrate_compress_threads_join(void)
314 {
315     int i, thread_count;
316
317     if (!migrate_use_compression()) {
318         return;
319     }
320     terminate_compression_threads();
321     thread_count = migrate_compress_threads();
322     for (i = 0; i < thread_count; i++) {
323         qemu_thread_join(compress_threads + i);
324         qemu_fclose(comp_param[i].file);
325         qemu_mutex_destroy(&comp_param[i].mutex);
326         qemu_cond_destroy(&comp_param[i].cond);
327     }
328     qemu_mutex_destroy(comp_done_lock);
329     qemu_cond_destroy(comp_done_cond);
330     g_free(compress_threads);
331     g_free(comp_param);
332     g_free(comp_done_cond);
333     g_free(comp_done_lock);
334     compress_threads = NULL;
335     comp_param = NULL;
336     comp_done_cond = NULL;
337     comp_done_lock = NULL;
338 }
339
340 void migrate_compress_threads_create(void)
341 {
342     int i, thread_count;
343
344     if (!migrate_use_compression()) {
345         return;
346     }
347     quit_comp_thread = false;
348     compression_switch = true;
349     thread_count = migrate_compress_threads();
350     compress_threads = g_new0(QemuThread, thread_count);
351     comp_param = g_new0(CompressParam, thread_count);
352     comp_done_cond = g_new0(QemuCond, 1);
353     comp_done_lock = g_new0(QemuMutex, 1);
354     qemu_cond_init(comp_done_cond);
355     qemu_mutex_init(comp_done_lock);
356     for (i = 0; i < thread_count; i++) {
357         /* com_param[i].file is just used as a dummy buffer to save data, set
358          * it's ops to empty.
359          */
360         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
361         comp_param[i].done = true;
362         qemu_mutex_init(&comp_param[i].mutex);
363         qemu_cond_init(&comp_param[i].cond);
364         qemu_thread_create(compress_threads + i, "compress",
365                            do_data_compress, comp_param + i,
366                            QEMU_THREAD_JOINABLE);
367     }
368 }
369
370 /**
371  * save_page_header: Write page header to wire
372  *
373  * If this is the 1st block, it also writes the block identification
374  *
375  * Returns: Number of bytes written
376  *
377  * @f: QEMUFile where to send the data
378  * @block: block that contains the page we want to send
379  * @offset: offset inside the block for the page
380  *          in the lower bits, it contains flags
381  */
382 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
383 {
384     size_t size;
385
386     qemu_put_be64(f, offset);
387     size = 8;
388
389     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
390         qemu_put_byte(f, strlen(block->idstr));
391         qemu_put_buffer(f, (uint8_t *)block->idstr,
392                         strlen(block->idstr));
393         size += 1 + strlen(block->idstr);
394     }
395     return size;
396 }
397
398 /* Update the xbzrle cache to reflect a page that's been sent as all 0.
399  * The important thing is that a stale (not-yet-0'd) page be replaced
400  * by the new data.
401  * As a bonus, if the page wasn't in the cache it gets added so that
402  * when a small write is made into the 0'd page it gets XBZRLE sent
403  */
404 static void xbzrle_cache_zero_page(ram_addr_t current_addr)
405 {
406     if (ram_bulk_stage || !migrate_use_xbzrle()) {
407         return;
408     }
409
410     /* We don't care if this fails to allocate a new cache page
411      * as long as it updated an old one */
412     cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
413                  bitmap_sync_count);
414 }
415
416 #define ENCODING_FLAG_XBZRLE 0x1
417
418 /**
419  * save_xbzrle_page: compress and send current page
420  *
421  * Returns: 1 means that we wrote the page
422  *          0 means that page is identical to the one already sent
423  *          -1 means that xbzrle would be longer than normal
424  *
425  * @f: QEMUFile where to send the data
426  * @current_data:
427  * @current_addr:
428  * @block: block that contains the page we want to send
429  * @offset: offset inside the block for the page
430  * @last_stage: if we are at the completion stage
431  * @bytes_transferred: increase it with the number of transferred bytes
432  */
433 static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
434                             ram_addr_t current_addr, RAMBlock *block,
435                             ram_addr_t offset, bool last_stage,
436                             uint64_t *bytes_transferred)
437 {
438     int encoded_len = 0, bytes_xbzrle;
439     uint8_t *prev_cached_page;
440
441     if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
442         acct_info.xbzrle_cache_miss++;
443         if (!last_stage) {
444             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
445                              bitmap_sync_count) == -1) {
446                 return -1;
447             } else {
448                 /* update *current_data when the page has been
449                    inserted into cache */
450                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
451             }
452         }
453         return -1;
454     }
455
456     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
457
458     /* save current buffer into memory */
459     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
460
461     /* XBZRLE encoding (if there is no overflow) */
462     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
463                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
464                                        TARGET_PAGE_SIZE);
465     if (encoded_len == 0) {
466         DPRINTF("Skipping unmodified page\n");
467         return 0;
468     } else if (encoded_len == -1) {
469         DPRINTF("Overflow\n");
470         acct_info.xbzrle_overflows++;
471         /* update data in the cache */
472         if (!last_stage) {
473             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
474             *current_data = prev_cached_page;
475         }
476         return -1;
477     }
478
479     /* we need to update the data in the cache, in order to get the same data */
480     if (!last_stage) {
481         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
482     }
483
484     /* Send XBZRLE based compressed page */
485     bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
486     qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
487     qemu_put_be16(f, encoded_len);
488     qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
489     bytes_xbzrle += encoded_len + 1 + 2;
490     acct_info.xbzrle_pages++;
491     acct_info.xbzrle_bytes += bytes_xbzrle;
492     *bytes_transferred += bytes_xbzrle;
493
494     return 1;
495 }
496
497 static inline
498 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
499                                                  ram_addr_t start)
500 {
501     unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
502     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
503     uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
504     unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
505
506     unsigned long next;
507
508     if (ram_bulk_stage && nr > base) {
509         next = nr + 1;
510     } else {
511         next = find_next_bit(migration_bitmap, size, nr);
512     }
513
514     if (next < size) {
515         clear_bit(next, migration_bitmap);
516         migration_dirty_pages--;
517     }
518     return (next - base) << TARGET_PAGE_BITS;
519 }
520
521 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
522 {
523     migration_dirty_pages +=
524         cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
525 }
526
527
528 /* Fix me: there are too many global variables used in migration process. */
529 static int64_t start_time;
530 static int64_t bytes_xfer_prev;
531 static int64_t num_dirty_pages_period;
532 static uint64_t xbzrle_cache_miss_prev;
533 static uint64_t iterations_prev;
534
535 static void migration_bitmap_sync_init(void)
536 {
537     start_time = 0;
538     bytes_xfer_prev = 0;
539     num_dirty_pages_period = 0;
540     xbzrle_cache_miss_prev = 0;
541     iterations_prev = 0;
542 }
543
544 /* Called with iothread lock held, to protect ram_list.dirty_memory[] */
545 static void migration_bitmap_sync(void)
546 {
547     RAMBlock *block;
548     uint64_t num_dirty_pages_init = migration_dirty_pages;
549     MigrationState *s = migrate_get_current();
550     int64_t end_time;
551     int64_t bytes_xfer_now;
552
553     bitmap_sync_count++;
554
555     if (!bytes_xfer_prev) {
556         bytes_xfer_prev = ram_bytes_transferred();
557     }
558
559     if (!start_time) {
560         start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
561     }
562
563     trace_migration_bitmap_sync_start();
564     address_space_sync_dirty_bitmap(&address_space_memory);
565
566     rcu_read_lock();
567     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
568         migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
569     }
570     rcu_read_unlock();
571
572     trace_migration_bitmap_sync_end(migration_dirty_pages
573                                     - num_dirty_pages_init);
574     num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
575     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
576
577     /* more than 1 second = 1000 millisecons */
578     if (end_time > start_time + 1000) {
579         if (migrate_auto_converge()) {
580             /* The following detection logic can be refined later. For now:
581                Check to see if the dirtied bytes is 50% more than the approx.
582                amount of bytes that just got transferred since the last time we
583                were in this routine. If that happens >N times (for now N==4)
584                we turn on the throttle down logic */
585             bytes_xfer_now = ram_bytes_transferred();
586             if (s->dirty_pages_rate &&
587                (num_dirty_pages_period * TARGET_PAGE_SIZE >
588                    (bytes_xfer_now - bytes_xfer_prev)/2) &&
589                (dirty_rate_high_cnt++ > 4)) {
590                     trace_migration_throttle();
591                     mig_throttle_on = true;
592                     dirty_rate_high_cnt = 0;
593              }
594              bytes_xfer_prev = bytes_xfer_now;
595         } else {
596              mig_throttle_on = false;
597         }
598         if (migrate_use_xbzrle()) {
599             if (iterations_prev != acct_info.iterations) {
600                 acct_info.xbzrle_cache_miss_rate =
601                    (double)(acct_info.xbzrle_cache_miss -
602                             xbzrle_cache_miss_prev) /
603                    (acct_info.iterations - iterations_prev);
604             }
605             iterations_prev = acct_info.iterations;
606             xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
607         }
608         s->dirty_pages_rate = num_dirty_pages_period * 1000
609             / (end_time - start_time);
610         s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
611         start_time = end_time;
612         num_dirty_pages_period = 0;
613     }
614     s->dirty_sync_count = bitmap_sync_count;
615 }
616
617 /**
618  * save_zero_page: Send the zero page to the stream
619  *
620  * Returns: Number of pages written.
621  *
622  * @f: QEMUFile where to send the data
623  * @block: block that contains the page we want to send
624  * @offset: offset inside the block for the page
625  * @p: pointer to the page
626  * @bytes_transferred: increase it with the number of transferred bytes
627  */
628 static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
629                           uint8_t *p, uint64_t *bytes_transferred)
630 {
631     int pages = -1;
632
633     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
634         acct_info.dup_pages++;
635         *bytes_transferred += save_page_header(f, block,
636                                                offset | RAM_SAVE_FLAG_COMPRESS);
637         qemu_put_byte(f, 0);
638         *bytes_transferred += 1;
639         pages = 1;
640     }
641
642     return pages;
643 }
644
645 /**
646  * ram_save_page: Send the given page to the stream
647  *
648  * Returns: Number of pages written.
649  *
650  * @f: QEMUFile where to send the data
651  * @block: block that contains the page we want to send
652  * @offset: offset inside the block for the page
653  * @last_stage: if we are at the completion stage
654  * @bytes_transferred: increase it with the number of transferred bytes
655  */
656 static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
657                          bool last_stage, uint64_t *bytes_transferred)
658 {
659     int pages = -1;
660     uint64_t bytes_xmit;
661     ram_addr_t current_addr;
662     MemoryRegion *mr = block->mr;
663     uint8_t *p;
664     int ret;
665     bool send_async = true;
666
667     p = memory_region_get_ram_ptr(mr) + offset;
668
669     /* In doubt sent page as normal */
670     bytes_xmit = 0;
671     ret = ram_control_save_page(f, block->offset,
672                            offset, TARGET_PAGE_SIZE, &bytes_xmit);
673     if (bytes_xmit) {
674         *bytes_transferred += bytes_xmit;
675         pages = 1;
676     }
677
678     XBZRLE_cache_lock();
679
680     current_addr = block->offset + offset;
681
682     if (block == last_sent_block) {
683         offset |= RAM_SAVE_FLAG_CONTINUE;
684     }
685     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
686         if (ret != RAM_SAVE_CONTROL_DELAYED) {
687             if (bytes_xmit > 0) {
688                 acct_info.norm_pages++;
689             } else if (bytes_xmit == 0) {
690                 acct_info.dup_pages++;
691             }
692         }
693     } else {
694         pages = save_zero_page(f, block, offset, p, bytes_transferred);
695         if (pages > 0) {
696             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
697              * page would be stale
698              */
699             xbzrle_cache_zero_page(current_addr);
700         } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
701             pages = save_xbzrle_page(f, &p, current_addr, block,
702                                      offset, last_stage, bytes_transferred);
703             if (!last_stage) {
704                 /* Can't send this cached data async, since the cache page
705                  * might get updated before it gets to the wire
706                  */
707                 send_async = false;
708             }
709         }
710     }
711
712     /* XBZRLE overflow or normal page */
713     if (pages == -1) {
714         *bytes_transferred += save_page_header(f, block,
715                                                offset | RAM_SAVE_FLAG_PAGE);
716         if (send_async) {
717             qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
718         } else {
719             qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
720         }
721         *bytes_transferred += TARGET_PAGE_SIZE;
722         pages = 1;
723         acct_info.norm_pages++;
724     }
725
726     XBZRLE_cache_unlock();
727
728     return pages;
729 }
730
731 static int do_compress_ram_page(CompressParam *param)
732 {
733     int bytes_sent, blen;
734     uint8_t *p;
735     RAMBlock *block = param->block;
736     ram_addr_t offset = param->offset;
737
738     p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK);
739
740     bytes_sent = save_page_header(param->file, block, offset |
741                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
742     blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
743                                      migrate_compress_level());
744     bytes_sent += blen;
745
746     return bytes_sent;
747 }
748
749 static inline void start_compression(CompressParam *param)
750 {
751     param->done = false;
752     qemu_mutex_lock(&param->mutex);
753     param->start = true;
754     qemu_cond_signal(&param->cond);
755     qemu_mutex_unlock(&param->mutex);
756 }
757
758 static inline void start_decompression(DecompressParam *param)
759 {
760     qemu_mutex_lock(&param->mutex);
761     param->start = true;
762     qemu_cond_signal(&param->cond);
763     qemu_mutex_unlock(&param->mutex);
764 }
765
766 static uint64_t bytes_transferred;
767
768 static void flush_compressed_data(QEMUFile *f)
769 {
770     int idx, len, thread_count;
771
772     if (!migrate_use_compression()) {
773         return;
774     }
775     thread_count = migrate_compress_threads();
776     for (idx = 0; idx < thread_count; idx++) {
777         if (!comp_param[idx].done) {
778             qemu_mutex_lock(comp_done_lock);
779             while (!comp_param[idx].done && !quit_comp_thread) {
780                 qemu_cond_wait(comp_done_cond, comp_done_lock);
781             }
782             qemu_mutex_unlock(comp_done_lock);
783         }
784         if (!quit_comp_thread) {
785             len = qemu_put_qemu_file(f, comp_param[idx].file);
786             bytes_transferred += len;
787         }
788     }
789 }
790
791 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
792                                        ram_addr_t offset)
793 {
794     param->block = block;
795     param->offset = offset;
796 }
797
798 static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
799                                            ram_addr_t offset,
800                                            uint64_t *bytes_transferred)
801 {
802     int idx, thread_count, bytes_xmit = -1, pages = -1;
803
804     thread_count = migrate_compress_threads();
805     qemu_mutex_lock(comp_done_lock);
806     while (true) {
807         for (idx = 0; idx < thread_count; idx++) {
808             if (comp_param[idx].done) {
809                 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
810                 set_compress_params(&comp_param[idx], block, offset);
811                 start_compression(&comp_param[idx]);
812                 pages = 1;
813                 acct_info.norm_pages++;
814                 *bytes_transferred += bytes_xmit;
815                 break;
816             }
817         }
818         if (pages > 0) {
819             break;
820         } else {
821             qemu_cond_wait(comp_done_cond, comp_done_lock);
822         }
823     }
824     qemu_mutex_unlock(comp_done_lock);
825
826     return pages;
827 }
828
829 /**
830  * ram_save_compressed_page: compress the given page and send it to the stream
831  *
832  * Returns: Number of pages written.
833  *
834  * @f: QEMUFile where to send the data
835  * @block: block that contains the page we want to send
836  * @offset: offset inside the block for the page
837  * @last_stage: if we are at the completion stage
838  * @bytes_transferred: increase it with the number of transferred bytes
839  */
840 static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
841                                     ram_addr_t offset, bool last_stage,
842                                     uint64_t *bytes_transferred)
843 {
844     int pages = -1;
845     uint64_t bytes_xmit;
846     MemoryRegion *mr = block->mr;
847     uint8_t *p;
848     int ret;
849
850     p = memory_region_get_ram_ptr(mr) + offset;
851
852     bytes_xmit = 0;
853     ret = ram_control_save_page(f, block->offset,
854                                 offset, TARGET_PAGE_SIZE, &bytes_xmit);
855     if (bytes_xmit) {
856         *bytes_transferred += bytes_xmit;
857         pages = 1;
858     }
859     if (block == last_sent_block) {
860         offset |= RAM_SAVE_FLAG_CONTINUE;
861     }
862     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
863         if (ret != RAM_SAVE_CONTROL_DELAYED) {
864             if (bytes_xmit > 0) {
865                 acct_info.norm_pages++;
866             } else if (bytes_xmit == 0) {
867                 acct_info.dup_pages++;
868             }
869         }
870     } else {
871         /* When starting the process of a new block, the first page of
872          * the block should be sent out before other pages in the same
873          * block, and all the pages in last block should have been sent
874          * out, keeping this order is important, because the 'cont' flag
875          * is used to avoid resending the block name.
876          */
877         if (block != last_sent_block) {
878             flush_compressed_data(f);
879             pages = save_zero_page(f, block, offset, p, bytes_transferred);
880             if (pages == -1) {
881                 set_compress_params(&comp_param[0], block, offset);
882                 /* Use the qemu thread to compress the data to make sure the
883                  * first page is sent out before other pages
884                  */
885                 bytes_xmit = do_compress_ram_page(&comp_param[0]);
886                 acct_info.norm_pages++;
887                 qemu_put_qemu_file(f, comp_param[0].file);
888                 *bytes_transferred += bytes_xmit;
889                 pages = 1;
890             }
891         } else {
892             pages = save_zero_page(f, block, offset, p, bytes_transferred);
893             if (pages == -1) {
894                 pages = compress_page_with_multi_thread(f, block, offset,
895                                                         bytes_transferred);
896             }
897         }
898     }
899
900     return pages;
901 }
902
903 /**
904  * ram_find_and_save_block: Finds a dirty page and sends it to f
905  *
906  * Called within an RCU critical section.
907  *
908  * Returns:  The number of pages written
909  *           0 means no dirty pages
910  *
911  * @f: QEMUFile where to send the data
912  * @last_stage: if we are at the completion stage
913  * @bytes_transferred: increase it with the number of transferred bytes
914  */
915
916 static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
917                                    uint64_t *bytes_transferred)
918 {
919     RAMBlock *block = last_seen_block;
920     ram_addr_t offset = last_offset;
921     bool complete_round = false;
922     int pages = 0;
923     MemoryRegion *mr;
924
925     if (!block)
926         block = QLIST_FIRST_RCU(&ram_list.blocks);
927
928     while (true) {
929         mr = block->mr;
930         offset = migration_bitmap_find_and_reset_dirty(mr, offset);
931         if (complete_round && block == last_seen_block &&
932             offset >= last_offset) {
933             break;
934         }
935         if (offset >= block->used_length) {
936             offset = 0;
937             block = QLIST_NEXT_RCU(block, next);
938             if (!block) {
939                 block = QLIST_FIRST_RCU(&ram_list.blocks);
940                 complete_round = true;
941                 ram_bulk_stage = false;
942                 if (migrate_use_xbzrle()) {
943                     /* If xbzrle is on, stop using the data compression at this
944                      * point. In theory, xbzrle can do better than compression.
945                      */
946                     flush_compressed_data(f);
947                     compression_switch = false;
948                 }
949             }
950         } else {
951             if (compression_switch && migrate_use_compression()) {
952                 pages = ram_save_compressed_page(f, block, offset, last_stage,
953                                                  bytes_transferred);
954             } else {
955                 pages = ram_save_page(f, block, offset, last_stage,
956                                       bytes_transferred);
957             }
958
959             /* if page is unmodified, continue to the next */
960             if (pages > 0) {
961                 last_sent_block = block;
962                 break;
963             }
964         }
965     }
966
967     last_seen_block = block;
968     last_offset = offset;
969
970     return pages;
971 }
972
973 void acct_update_position(QEMUFile *f, size_t size, bool zero)
974 {
975     uint64_t pages = size / TARGET_PAGE_SIZE;
976     if (zero) {
977         acct_info.dup_pages += pages;
978     } else {
979         acct_info.norm_pages += pages;
980         bytes_transferred += size;
981         qemu_update_position(f, size);
982     }
983 }
984
985 static ram_addr_t ram_save_remaining(void)
986 {
987     return migration_dirty_pages;
988 }
989
990 uint64_t ram_bytes_remaining(void)
991 {
992     return ram_save_remaining() * TARGET_PAGE_SIZE;
993 }
994
995 uint64_t ram_bytes_transferred(void)
996 {
997     return bytes_transferred;
998 }
999
1000 uint64_t ram_bytes_total(void)
1001 {
1002     RAMBlock *block;
1003     uint64_t total = 0;
1004
1005     rcu_read_lock();
1006     QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1007         total += block->used_length;
1008     rcu_read_unlock();
1009     return total;
1010 }
1011
1012 void free_xbzrle_decoded_buf(void)
1013 {
1014     g_free(xbzrle_decoded_buf);
1015     xbzrle_decoded_buf = NULL;
1016 }
1017
1018 static void migration_end(void)
1019 {
1020     if (migration_bitmap) {
1021         memory_global_dirty_log_stop();
1022         g_free(migration_bitmap);
1023         migration_bitmap = NULL;
1024     }
1025
1026     XBZRLE_cache_lock();
1027     if (XBZRLE.cache) {
1028         cache_fini(XBZRLE.cache);
1029         g_free(XBZRLE.encoded_buf);
1030         g_free(XBZRLE.current_buf);
1031         XBZRLE.cache = NULL;
1032         XBZRLE.encoded_buf = NULL;
1033         XBZRLE.current_buf = NULL;
1034     }
1035     XBZRLE_cache_unlock();
1036 }
1037
1038 static void ram_migration_cancel(void *opaque)
1039 {
1040     migration_end();
1041 }
1042
1043 static void reset_ram_globals(void)
1044 {
1045     last_seen_block = NULL;
1046     last_sent_block = NULL;
1047     last_offset = 0;
1048     last_version = ram_list.version;
1049     ram_bulk_stage = true;
1050 }
1051
1052 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1053
1054
1055 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1056  * long-running RCU critical section.  When rcu-reclaims in the code
1057  * start to become numerous it will be necessary to reduce the
1058  * granularity of these critical sections.
1059  */
1060
1061 static int ram_save_setup(QEMUFile *f, void *opaque)
1062 {
1063     RAMBlock *block;
1064     int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1065
1066     mig_throttle_on = false;
1067     dirty_rate_high_cnt = 0;
1068     bitmap_sync_count = 0;
1069     migration_bitmap_sync_init();
1070
1071     if (migrate_use_xbzrle()) {
1072         XBZRLE_cache_lock();
1073         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1074                                   TARGET_PAGE_SIZE,
1075                                   TARGET_PAGE_SIZE);
1076         if (!XBZRLE.cache) {
1077             XBZRLE_cache_unlock();
1078             error_report("Error creating cache");
1079             return -1;
1080         }
1081         XBZRLE_cache_unlock();
1082
1083         /* We prefer not to abort if there is no memory */
1084         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1085         if (!XBZRLE.encoded_buf) {
1086             error_report("Error allocating encoded_buf");
1087             return -1;
1088         }
1089
1090         XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1091         if (!XBZRLE.current_buf) {
1092             error_report("Error allocating current_buf");
1093             g_free(XBZRLE.encoded_buf);
1094             XBZRLE.encoded_buf = NULL;
1095             return -1;
1096         }
1097
1098         acct_clear();
1099     }
1100
1101     /* iothread lock needed for ram_list.dirty_memory[] */
1102     qemu_mutex_lock_iothread();
1103     qemu_mutex_lock_ramlist();
1104     rcu_read_lock();
1105     bytes_transferred = 0;
1106     reset_ram_globals();
1107
1108     ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1109     migration_bitmap = bitmap_new(ram_bitmap_pages);
1110     bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
1111
1112     /*
1113      * Count the total number of pages used by ram blocks not including any
1114      * gaps due to alignment or unplugs.
1115      */
1116     migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1117
1118     memory_global_dirty_log_start();
1119     migration_bitmap_sync();
1120     qemu_mutex_unlock_ramlist();
1121     qemu_mutex_unlock_iothread();
1122
1123     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1124
1125     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1126         qemu_put_byte(f, strlen(block->idstr));
1127         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1128         qemu_put_be64(f, block->used_length);
1129     }
1130
1131     rcu_read_unlock();
1132
1133     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1134     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1135
1136     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1137
1138     return 0;
1139 }
1140
1141 static int ram_save_iterate(QEMUFile *f, void *opaque)
1142 {
1143     int ret;
1144     int i;
1145     int64_t t0;
1146     int pages_sent = 0;
1147
1148     rcu_read_lock();
1149     if (ram_list.version != last_version) {
1150         reset_ram_globals();
1151     }
1152
1153     /* Read version before ram_list.blocks */
1154     smp_rmb();
1155
1156     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1157
1158     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1159     i = 0;
1160     while ((ret = qemu_file_rate_limit(f)) == 0) {
1161         int pages;
1162
1163         pages = ram_find_and_save_block(f, false, &bytes_transferred);
1164         /* no more pages to sent */
1165         if (pages == 0) {
1166             break;
1167         }
1168         pages_sent += pages;
1169         acct_info.iterations++;
1170         check_guest_throttling();
1171         /* we want to check in the 1st loop, just in case it was the 1st time
1172            and we had to sync the dirty bitmap.
1173            qemu_get_clock_ns() is a bit expensive, so we only check each some
1174            iterations
1175         */
1176         if ((i & 63) == 0) {
1177             uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
1178             if (t1 > MAX_WAIT) {
1179                 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
1180                         t1, i);
1181                 break;
1182             }
1183         }
1184         i++;
1185     }
1186     flush_compressed_data(f);
1187     rcu_read_unlock();
1188
1189     /*
1190      * Must occur before EOS (or any QEMUFile operation)
1191      * because of RDMA protocol.
1192      */
1193     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
1194
1195     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1196     bytes_transferred += 8;
1197
1198     ret = qemu_file_get_error(f);
1199     if (ret < 0) {
1200         return ret;
1201     }
1202
1203     return pages_sent;
1204 }
1205
1206 /* Called with iothread lock */
1207 static int ram_save_complete(QEMUFile *f, void *opaque)
1208 {
1209     rcu_read_lock();
1210
1211     migration_bitmap_sync();
1212
1213     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
1214
1215     /* try transferring iterative blocks of memory */
1216
1217     /* flush all remaining blocks regardless of rate limiting */
1218     while (true) {
1219         int pages;
1220
1221         pages = ram_find_and_save_block(f, true, &bytes_transferred);
1222         /* no more blocks to sent */
1223         if (pages == 0) {
1224             break;
1225         }
1226     }
1227
1228     flush_compressed_data(f);
1229     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
1230     migration_end();
1231
1232     rcu_read_unlock();
1233     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1234
1235     return 0;
1236 }
1237
1238 static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
1239 {
1240     uint64_t remaining_size;
1241
1242     remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
1243
1244     if (remaining_size < max_size) {
1245         qemu_mutex_lock_iothread();
1246         rcu_read_lock();
1247         migration_bitmap_sync();
1248         rcu_read_unlock();
1249         qemu_mutex_unlock_iothread();
1250         remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
1251     }
1252     return remaining_size;
1253 }
1254
1255 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
1256 {
1257     unsigned int xh_len;
1258     int xh_flags;
1259
1260     if (!xbzrle_decoded_buf) {
1261         xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
1262     }
1263
1264     /* extract RLE header */
1265     xh_flags = qemu_get_byte(f);
1266     xh_len = qemu_get_be16(f);
1267
1268     if (xh_flags != ENCODING_FLAG_XBZRLE) {
1269         error_report("Failed to load XBZRLE page - wrong compression!");
1270         return -1;
1271     }
1272
1273     if (xh_len > TARGET_PAGE_SIZE) {
1274         error_report("Failed to load XBZRLE page - len overflow!");
1275         return -1;
1276     }
1277     /* load data and decode */
1278     qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
1279
1280     /* decode RLE */
1281     if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
1282                              TARGET_PAGE_SIZE) == -1) {
1283         error_report("Failed to load XBZRLE page - decode error!");
1284         return -1;
1285     }
1286
1287     return 0;
1288 }
1289
1290 /* Must be called from within a rcu critical section.
1291  * Returns a pointer from within the RCU-protected ram_list.
1292  */
1293 static inline void *host_from_stream_offset(QEMUFile *f,
1294                                             ram_addr_t offset,
1295                                             int flags)
1296 {
1297     static RAMBlock *block = NULL;
1298     char id[256];
1299     uint8_t len;
1300
1301     if (flags & RAM_SAVE_FLAG_CONTINUE) {
1302         if (!block || block->max_length <= offset) {
1303             error_report("Ack, bad migration stream!");
1304             return NULL;
1305         }
1306
1307         return memory_region_get_ram_ptr(block->mr) + offset;
1308     }
1309
1310     len = qemu_get_byte(f);
1311     qemu_get_buffer(f, (uint8_t *)id, len);
1312     id[len] = 0;
1313
1314     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1315         if (!strncmp(id, block->idstr, sizeof(id)) &&
1316             block->max_length > offset) {
1317             return memory_region_get_ram_ptr(block->mr) + offset;
1318         }
1319     }
1320
1321     error_report("Can't find block %s!", id);
1322     return NULL;
1323 }
1324
1325 /*
1326  * If a page (or a whole RDMA chunk) has been
1327  * determined to be zero, then zap it.
1328  */
1329 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
1330 {
1331     if (ch != 0 || !is_zero_range(host, size)) {
1332         memset(host, ch, size);
1333     }
1334 }
1335
1336 static void *do_data_decompress(void *opaque)
1337 {
1338     DecompressParam *param = opaque;
1339     unsigned long pagesize;
1340
1341     while (!quit_decomp_thread) {
1342         qemu_mutex_lock(&param->mutex);
1343         while (!param->start && !quit_decomp_thread) {
1344             qemu_cond_wait(&param->cond, &param->mutex);
1345             pagesize = TARGET_PAGE_SIZE;
1346             if (!quit_decomp_thread) {
1347                 /* uncompress() will return failed in some case, especially
1348                  * when the page is dirted when doing the compression, it's
1349                  * not a problem because the dirty page will be retransferred
1350                  * and uncompress() won't break the data in other pages.
1351                  */
1352                 uncompress((Bytef *)param->des, &pagesize,
1353                            (const Bytef *)param->compbuf, param->len);
1354             }
1355             param->start = false;
1356         }
1357         qemu_mutex_unlock(&param->mutex);
1358     }
1359
1360     return NULL;
1361 }
1362
1363 void migrate_decompress_threads_create(void)
1364 {
1365     int i, thread_count;
1366
1367     thread_count = migrate_decompress_threads();
1368     decompress_threads = g_new0(QemuThread, thread_count);
1369     decomp_param = g_new0(DecompressParam, thread_count);
1370     compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
1371     quit_decomp_thread = false;
1372     for (i = 0; i < thread_count; i++) {
1373         qemu_mutex_init(&decomp_param[i].mutex);
1374         qemu_cond_init(&decomp_param[i].cond);
1375         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
1376         qemu_thread_create(decompress_threads + i, "decompress",
1377                            do_data_decompress, decomp_param + i,
1378                            QEMU_THREAD_JOINABLE);
1379     }
1380 }
1381
1382 void migrate_decompress_threads_join(void)
1383 {
1384     int i, thread_count;
1385
1386     quit_decomp_thread = true;
1387     thread_count = migrate_decompress_threads();
1388     for (i = 0; i < thread_count; i++) {
1389         qemu_mutex_lock(&decomp_param[i].mutex);
1390         qemu_cond_signal(&decomp_param[i].cond);
1391         qemu_mutex_unlock(&decomp_param[i].mutex);
1392     }
1393     for (i = 0; i < thread_count; i++) {
1394         qemu_thread_join(decompress_threads + i);
1395         qemu_mutex_destroy(&decomp_param[i].mutex);
1396         qemu_cond_destroy(&decomp_param[i].cond);
1397         g_free(decomp_param[i].compbuf);
1398     }
1399     g_free(decompress_threads);
1400     g_free(decomp_param);
1401     g_free(compressed_data_buf);
1402     decompress_threads = NULL;
1403     decomp_param = NULL;
1404     compressed_data_buf = NULL;
1405 }
1406
1407 static void decompress_data_with_multi_threads(uint8_t *compbuf,
1408                                                void *host, int len)
1409 {
1410     int idx, thread_count;
1411
1412     thread_count = migrate_decompress_threads();
1413     while (true) {
1414         for (idx = 0; idx < thread_count; idx++) {
1415             if (!decomp_param[idx].start) {
1416                 memcpy(decomp_param[idx].compbuf, compbuf, len);
1417                 decomp_param[idx].des = host;
1418                 decomp_param[idx].len = len;
1419                 start_decompression(&decomp_param[idx]);
1420                 break;
1421             }
1422         }
1423         if (idx < thread_count) {
1424             break;
1425         }
1426     }
1427 }
1428
1429 static int ram_load(QEMUFile *f, void *opaque, int version_id)
1430 {
1431     int flags = 0, ret = 0;
1432     static uint64_t seq_iter;
1433     int len = 0;
1434
1435     seq_iter++;
1436
1437     if (version_id != 4) {
1438         ret = -EINVAL;
1439     }
1440
1441     /* This RCU critical section can be very long running.
1442      * When RCU reclaims in the code start to become numerous,
1443      * it will be necessary to reduce the granularity of this
1444      * critical section.
1445      */
1446     rcu_read_lock();
1447     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
1448         ram_addr_t addr, total_ram_bytes;
1449         void *host;
1450         uint8_t ch;
1451
1452         addr = qemu_get_be64(f);
1453         flags = addr & ~TARGET_PAGE_MASK;
1454         addr &= TARGET_PAGE_MASK;
1455
1456         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
1457         case RAM_SAVE_FLAG_MEM_SIZE:
1458             /* Synchronize RAM block list */
1459             total_ram_bytes = addr;
1460             while (!ret && total_ram_bytes) {
1461                 RAMBlock *block;
1462                 char id[256];
1463                 ram_addr_t length;
1464
1465                 len = qemu_get_byte(f);
1466                 qemu_get_buffer(f, (uint8_t *)id, len);
1467                 id[len] = 0;
1468                 length = qemu_get_be64(f);
1469
1470                 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1471                     if (!strncmp(id, block->idstr, sizeof(id))) {
1472                         if (length != block->used_length) {
1473                             Error *local_err = NULL;
1474
1475                             ret = qemu_ram_resize(block->offset, length, &local_err);
1476                             if (local_err) {
1477                                 error_report_err(local_err);
1478                             }
1479                         }
1480                         break;
1481                     }
1482                 }
1483
1484                 if (!block) {
1485                     error_report("Unknown ramblock \"%s\", cannot "
1486                                  "accept migration", id);
1487                     ret = -EINVAL;
1488                 }
1489
1490                 total_ram_bytes -= length;
1491             }
1492             break;
1493         case RAM_SAVE_FLAG_COMPRESS:
1494             host = host_from_stream_offset(f, addr, flags);
1495             if (!host) {
1496                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1497                 ret = -EINVAL;
1498                 break;
1499             }
1500             ch = qemu_get_byte(f);
1501             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
1502             break;
1503         case RAM_SAVE_FLAG_PAGE:
1504             host = host_from_stream_offset(f, addr, flags);
1505             if (!host) {
1506                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1507                 ret = -EINVAL;
1508                 break;
1509             }
1510             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
1511             break;
1512         case RAM_SAVE_FLAG_COMPRESS_PAGE:
1513             host = host_from_stream_offset(f, addr, flags);
1514             if (!host) {
1515                 error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
1516                 ret = -EINVAL;
1517                 break;
1518             }
1519
1520             len = qemu_get_be32(f);
1521             if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
1522                 error_report("Invalid compressed data length: %d", len);
1523                 ret = -EINVAL;
1524                 break;
1525             }
1526             qemu_get_buffer(f, compressed_data_buf, len);
1527             decompress_data_with_multi_threads(compressed_data_buf, host, len);
1528             break;
1529         case RAM_SAVE_FLAG_XBZRLE:
1530             host = host_from_stream_offset(f, addr, flags);
1531             if (!host) {
1532                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1533                 ret = -EINVAL;
1534                 break;
1535             }
1536             if (load_xbzrle(f, addr, host) < 0) {
1537                 error_report("Failed to decompress XBZRLE page at "
1538                              RAM_ADDR_FMT, addr);
1539                 ret = -EINVAL;
1540                 break;
1541             }
1542             break;
1543         case RAM_SAVE_FLAG_EOS:
1544             /* normal exit */
1545             break;
1546         default:
1547             if (flags & RAM_SAVE_FLAG_HOOK) {
1548                 ram_control_load_hook(f, flags);
1549             } else {
1550                 error_report("Unknown combination of migration flags: %#x",
1551                              flags);
1552                 ret = -EINVAL;
1553             }
1554         }
1555         if (!ret) {
1556             ret = qemu_file_get_error(f);
1557         }
1558     }
1559
1560     rcu_read_unlock();
1561     DPRINTF("Completed load of VM with exit code %d seq iteration "
1562             "%" PRIu64 "\n", ret, seq_iter);
1563     return ret;
1564 }
1565
1566 static SaveVMHandlers savevm_ram_handlers = {
1567     .save_live_setup = ram_save_setup,
1568     .save_live_iterate = ram_save_iterate,
1569     .save_live_complete = ram_save_complete,
1570     .save_live_pending = ram_save_pending,
1571     .load_state = ram_load,
1572     .cancel = ram_migration_cancel,
1573 };
1574
1575 void ram_mig_init(void)
1576 {
1577     qemu_mutex_init(&XBZRLE.lock);
1578     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
1579 }
1580 /* Stub function that's gets run on the vcpu when its brought out of the
1581    VM to run inside qemu via async_run_on_cpu()*/
1582
1583 static void mig_sleep_cpu(void *opq)
1584 {
1585     qemu_mutex_unlock_iothread();
1586     g_usleep(30*1000);
1587     qemu_mutex_lock_iothread();
1588 }
1589
1590 /* To reduce the dirty rate explicitly disallow the VCPUs from spending
1591    much time in the VM. The migration thread will try to catchup.
1592    Workload will experience a performance drop.
1593 */
1594 static void mig_throttle_guest_down(void)
1595 {
1596     CPUState *cpu;
1597
1598     qemu_mutex_lock_iothread();
1599     CPU_FOREACH(cpu) {
1600         async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
1601     }
1602     qemu_mutex_unlock_iothread();
1603 }
1604
1605 static void check_guest_throttling(void)
1606 {
1607     static int64_t t0;
1608     int64_t        t1;
1609
1610     if (!mig_throttle_on) {
1611         return;
1612     }
1613
1614     if (!t0)  {
1615         t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1616         return;
1617     }
1618
1619     t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1620
1621     /* If it has been more than 40 ms since the last time the guest
1622      * was throttled then do it again.
1623      */
1624     if (40 < (t1-t0)/1000000) {
1625         mig_throttle_guest_down();
1626         t0 = t1;
1627     }
1628 }
This page took 0.12092 seconds and 4 git commands to generate.