+static uint64_t dump_paddr_to_pfn(DumpState *s, uint64_t addr)
+{
+ int target_page_shift = ctz32(s->dump_info.page_size);
+
+ return (addr >> target_page_shift) - ARCH_PFN_OFFSET;
+}
+
+static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn)
+{
+ int target_page_shift = ctz32(s->dump_info.page_size);
+
+ return (pfn + ARCH_PFN_OFFSET) << target_page_shift;
+}
+
+/*
+ * exam every page and return the page frame number and the address of the page.
+ * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
+ * blocks, so block->target_start and block->target_end should be interal
+ * multiples of the target page size.
+ */
+static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
+ uint8_t **bufptr, DumpState *s)
+{
+ GuestPhysBlock *block = *blockptr;
+ hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1);
+ uint8_t *buf;
+
+ /* block == NULL means the start of the iteration */
+ if (!block) {
+ block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
+ *blockptr = block;
+ assert((block->target_start & ~target_page_mask) == 0);
+ assert((block->target_end & ~target_page_mask) == 0);
+ *pfnptr = dump_paddr_to_pfn(s, block->target_start);
+ if (bufptr) {
+ *bufptr = block->host_addr;
+ }
+ return true;
+ }
+
+ *pfnptr = *pfnptr + 1;
+ addr = dump_pfn_to_paddr(s, *pfnptr);
+
+ if ((addr >= block->target_start) &&
+ (addr + s->dump_info.page_size <= block->target_end)) {
+ buf = block->host_addr + (addr - block->target_start);
+ } else {
+ /* the next page is in the next block */
+ block = QTAILQ_NEXT(block, next);
+ *blockptr = block;
+ if (!block) {
+ return false;
+ }
+ assert((block->target_start & ~target_page_mask) == 0);
+ assert((block->target_end & ~target_page_mask) == 0);
+ *pfnptr = dump_paddr_to_pfn(s, block->target_start);
+ buf = block->host_addr;
+ }
+
+ if (bufptr) {
+ *bufptr = buf;
+ }
+
+ return true;
+}
+
+static void write_dump_bitmap(DumpState *s, Error **errp)
+{
+ int ret = 0;
+ uint64_t last_pfn, pfn;
+ void *dump_bitmap_buf;
+ size_t num_dumpable;
+ GuestPhysBlock *block_iter = NULL;
+ size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
+ size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
+
+ /* dump_bitmap_buf is used to store dump_bitmap temporarily */
+ dump_bitmap_buf = g_malloc0(bitmap_bufsize);
+
+ num_dumpable = 0;
+ last_pfn = 0;
+
+ /*
+ * exam memory page by page, and set the bit in dump_bitmap corresponded
+ * to the existing page.
+ */
+ while (get_next_page(&block_iter, &pfn, NULL, s)) {
+ ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to set dump_bitmap");
+ goto out;
+ }
+
+ last_pfn = pfn;
+ num_dumpable++;
+ }
+
+ /*
+ * set_dump_bitmap will always leave the recently set bit un-sync. Here we
+ * set the remaining bits from last_pfn to the end of the bitmap buffer to
+ * 0. With those set, the un-sync bit will be synchronized into the vmcore.
+ */
+ if (num_dumpable > 0) {
+ ret = set_dump_bitmap(last_pfn, last_pfn + bits_per_buf, false,
+ dump_bitmap_buf, s);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to sync dump_bitmap");
+ goto out;
+ }
+ }
+
+ /* number of dumpable pages that will be dumped later */
+ s->num_dumpable = num_dumpable;
+
+out:
+ g_free(dump_bitmap_buf);
+}
+
+static void prepare_data_cache(DataCache *data_cache, DumpState *s,
+ off_t offset)
+{
+ data_cache->fd = s->fd;
+ data_cache->data_size = 0;
+ data_cache->buf_size = 4 * dump_bitmap_get_bufsize(s);
+ data_cache->buf = g_malloc0(data_cache->buf_size);
+ data_cache->offset = offset;
+}
+
+static int write_cache(DataCache *dc, const void *buf, size_t size,
+ bool flag_sync)
+{
+ /*
+ * dc->buf_size should not be less than size, otherwise dc will never be
+ * enough
+ */
+ assert(size <= dc->buf_size);
+
+ /*
+ * if flag_sync is set, synchronize data in dc->buf into vmcore.
+ * otherwise check if the space is enough for caching data in buf, if not,
+ * write the data in dc->buf to dc->fd and reset dc->buf
+ */
+ if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
+ (flag_sync && dc->data_size > 0)) {
+ if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
+ return -1;
+ }
+
+ dc->offset += dc->data_size;
+ dc->data_size = 0;
+ }
+
+ if (!flag_sync) {
+ memcpy(dc->buf + dc->data_size, buf, size);
+ dc->data_size += size;
+ }
+
+ return 0;
+}
+
+static void free_data_cache(DataCache *data_cache)
+{
+ g_free(data_cache->buf);
+}
+
+static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
+{
+ switch (flag_compress) {
+ case DUMP_DH_COMPRESSED_ZLIB:
+ return compressBound(page_size);
+
+ case DUMP_DH_COMPRESSED_LZO:
+ /*
+ * LZO will expand incompressible data by a little amount. Please check
+ * the following URL to see the expansion calculation:
+ * http://www.oberhumer.com/opensource/lzo/lzofaq.php
+ */
+ return page_size + page_size / 16 + 64 + 3;
+
+#ifdef CONFIG_SNAPPY
+ case DUMP_DH_COMPRESSED_SNAPPY:
+ return snappy_max_compressed_length(page_size);
+#endif
+ }
+ return 0;
+}
+
+/*
+ * check if the page is all 0
+ */
+static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
+{
+ return buffer_is_zero(buf, page_size);
+}
+
+static void write_dump_pages(DumpState *s, Error **errp)
+{
+ int ret = 0;
+ DataCache page_desc, page_data;
+ size_t len_buf_out, size_out;
+#ifdef CONFIG_LZO
+ lzo_bytep wrkmem = NULL;
+#endif
+ uint8_t *buf_out = NULL;
+ off_t offset_desc, offset_data;
+ PageDescriptor pd, pd_zero;
+ uint8_t *buf;
+ GuestPhysBlock *block_iter = NULL;
+ uint64_t pfn_iter;
+
+ /* get offset of page_desc and page_data in dump file */
+ offset_desc = s->offset_page;
+ offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;
+
+ prepare_data_cache(&page_desc, s, offset_desc);
+ prepare_data_cache(&page_data, s, offset_data);
+
+ /* prepare buffer to store compressed data */
+ len_buf_out = get_len_buf_out(s->dump_info.page_size, s->flag_compress);
+ assert(len_buf_out != 0);
+
+#ifdef CONFIG_LZO
+ wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
+#endif
+
+ buf_out = g_malloc(len_buf_out);
+
+ /*
+ * init zero page's page_desc and page_data, because every zero page
+ * uses the same page_data
+ */
+ pd_zero.size = cpu_to_dump32(s, s->dump_info.page_size);
+ pd_zero.flags = cpu_to_dump32(s, 0);
+ pd_zero.offset = cpu_to_dump64(s, offset_data);
+ pd_zero.page_flags = cpu_to_dump64(s, 0);
+ buf = g_malloc0(s->dump_info.page_size);
+ ret = write_cache(&page_data, buf, s->dump_info.page_size, false);
+ g_free(buf);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to write page data (zero page)");
+ goto out;
+ }
+
+ offset_data += s->dump_info.page_size;
+
+ /*
+ * dump memory to vmcore page by page. zero page will all be resided in the
+ * first page of page section
+ */
+ while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
+ /* check zero page */
+ if (is_zero_page(buf, s->dump_info.page_size)) {
+ ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
+ false);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to write page desc");
+ goto out;
+ }
+ } else {
+ /*
+ * not zero page, then:
+ * 1. compress the page
+ * 2. write the compressed page into the cache of page_data
+ * 3. get page desc of the compressed page and write it into the
+ * cache of page_desc
+ *
+ * only one compression format will be used here, for
+ * s->flag_compress is set. But when compression fails to work,
+ * we fall back to save in plaintext.
+ */
+ size_out = len_buf_out;
+ if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
+ (compress2(buf_out, (uLongf *)&size_out, buf,
+ s->dump_info.page_size, Z_BEST_SPEED) == Z_OK) &&
+ (size_out < s->dump_info.page_size)) {
+ pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_ZLIB);
+ pd.size = cpu_to_dump32(s, size_out);
+
+ ret = write_cache(&page_data, buf_out, size_out, false);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to write page data");
+ goto out;
+ }
+#ifdef CONFIG_LZO
+ } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
+ (lzo1x_1_compress(buf, s->dump_info.page_size, buf_out,
+ (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
+ (size_out < s->dump_info.page_size)) {
+ pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_LZO);
+ pd.size = cpu_to_dump32(s, size_out);
+
+ ret = write_cache(&page_data, buf_out, size_out, false);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to write page data");
+ goto out;
+ }
+#endif
+#ifdef CONFIG_SNAPPY
+ } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
+ (snappy_compress((char *)buf, s->dump_info.page_size,
+ (char *)buf_out, &size_out) == SNAPPY_OK) &&
+ (size_out < s->dump_info.page_size)) {
+ pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_SNAPPY);
+ pd.size = cpu_to_dump32(s, size_out);
+
+ ret = write_cache(&page_data, buf_out, size_out, false);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to write page data");
+ goto out;
+ }
+#endif
+ } else {
+ /*
+ * fall back to save in plaintext, size_out should be
+ * assigned the target's page size
+ */
+ pd.flags = cpu_to_dump32(s, 0);
+ size_out = s->dump_info.page_size;
+ pd.size = cpu_to_dump32(s, size_out);
+
+ ret = write_cache(&page_data, buf,
+ s->dump_info.page_size, false);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to write page data");
+ goto out;
+ }
+ }
+
+ /* get and write page desc here */
+ pd.page_flags = cpu_to_dump64(s, 0);
+ pd.offset = cpu_to_dump64(s, offset_data);
+ offset_data += size_out;
+
+ ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to write page desc");
+ goto out;
+ }
+ }
+ s->written_size += s->dump_info.page_size;
+ }
+
+ ret = write_cache(&page_desc, NULL, 0, true);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to sync cache for page_desc");
+ goto out;
+ }
+ ret = write_cache(&page_data, NULL, 0, true);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to sync cache for page_data");
+ goto out;
+ }
+
+out:
+ free_data_cache(&page_desc);
+ free_data_cache(&page_data);
+
+#ifdef CONFIG_LZO
+ g_free(wrkmem);
+#endif
+
+ g_free(buf_out);
+}
+
+static void create_kdump_vmcore(DumpState *s, Error **errp)
+{
+ int ret;
+ Error *local_err = NULL;
+
+ /*
+ * the kdump-compressed format is:
+ * File offset
+ * +------------------------------------------+ 0x0
+ * | main header (struct disk_dump_header) |
+ * |------------------------------------------+ block 1
+ * | sub header (struct kdump_sub_header) |
+ * |------------------------------------------+ block 2
+ * | 1st-dump_bitmap |
+ * |------------------------------------------+ block 2 + X blocks
+ * | 2nd-dump_bitmap | (aligned by block)
+ * |------------------------------------------+ block 2 + 2 * X blocks
+ * | page desc for pfn 0 (struct page_desc) | (aligned by block)
+ * | page desc for pfn 1 (struct page_desc) |
+ * | : |
+ * |------------------------------------------| (not aligned by block)
+ * | page data (pfn 0) |
+ * | page data (pfn 1) |
+ * | : |
+ * +------------------------------------------+
+ */
+
+ ret = write_start_flat_header(s->fd);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to write start flat header");
+ return;
+ }
+
+ write_dump_header(s, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ write_dump_bitmap(s, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ write_dump_pages(s, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ ret = write_end_flat_header(s->fd);
+ if (ret < 0) {
+ error_setg(errp, "dump: failed to write end flat header");
+ return;
+ }
+}
+