X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/f1d640524d0557f78343504c3b128d412f573f28..4dd84ac9a731f19576aec14d0f2f0037e81a5922:/block/vmdk.c diff --git a/block/vmdk.c b/block/vmdk.c index a9bd22bf93..fd78fd0ccf 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -27,11 +27,13 @@ #include "qapi/error.h" #include "block/block_int.h" #include "sysemu/block-backend.h" +#include "qapi/qmp/qdict.h" #include "qapi/qmp/qerror.h" #include "qemu/error-report.h" #include "qemu/module.h" +#include "qemu/option.h" #include "qemu/bswap.h" -#include "migration/migration.h" +#include "migration/blocker.h" #include "qemu/cutils.h" #include @@ -46,6 +48,8 @@ #define VMDK4_FLAG_MARKER (1 << 17) #define VMDK4_GD_AT_END 0xffffffffffffffffULL +#define VMDK_EXTENT_MAX_SECTORS (1ULL << 32) + #define VMDK_GTE_ZEROED 0x1 /* VMDK internal error codes */ @@ -87,6 +91,44 @@ typedef struct { uint16_t compressAlgorithm; } QEMU_PACKED VMDK4Header; +typedef struct VMDKSESparseConstHeader { + uint64_t magic; + uint64_t version; + uint64_t capacity; + uint64_t grain_size; + uint64_t grain_table_size; + uint64_t flags; + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + uint64_t volatile_header_offset; + uint64_t volatile_header_size; + uint64_t journal_header_offset; + uint64_t journal_header_size; + uint64_t journal_offset; + uint64_t journal_size; + uint64_t grain_dir_offset; + uint64_t grain_dir_size; + uint64_t grain_tables_offset; + uint64_t grain_tables_size; + uint64_t free_bitmap_offset; + uint64_t free_bitmap_size; + uint64_t backmap_offset; + uint64_t backmap_size; + uint64_t grains_offset; + uint64_t grains_size; + uint8_t pad[304]; +} QEMU_PACKED VMDKSESparseConstHeader; + +typedef struct VMDKSESparseVolatileHeader { + uint64_t magic; + uint64_t free_gt_number; + uint64_t next_txn_seq_number; + uint64_t replay_journal; + uint8_t pad[480]; +} QEMU_PACKED VMDKSESparseVolatileHeader; + #define L2_CACHE_SIZE 16 typedef struct VmdkExtent { @@ -95,19 +137,23 @@ typedef struct VmdkExtent { bool compressed; bool has_marker; bool has_zero_grain; + bool sesparse; + uint64_t sesparse_l2_tables_offset; + uint64_t sesparse_clusters_offset; + int32_t entry_size; int version; int64_t sectors; int64_t end_sector; int64_t flat_start_offset; int64_t l1_table_offset; int64_t l1_backup_table_offset; - uint32_t *l1_table; + void *l1_table; uint32_t *l1_backup_table; unsigned int l1_size; uint32_t l1_entry_sectors; unsigned int l2_size; - uint32_t *l2_cache; + void *l2_cache; uint32_t l2_cache_offsets[L2_CACHE_SIZE]; uint32_t l2_cache_counts[L2_CACHE_SIZE]; @@ -191,13 +237,15 @@ static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) } if (end - p >= strlen("version=X\n")) { if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 || - strncmp("version=2\n", p, strlen("version=2\n")) == 0) { + strncmp("version=2\n", p, strlen("version=2\n")) == 0 || + strncmp("version=3\n", p, strlen("version=3\n")) == 0) { return 100; } } if (end - p >= strlen("version=X\r\n")) { if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 || - strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) { + strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0 || + strncmp("version=3\r\n", p, strlen("version=3\r\n")) == 0) { return 100; } } @@ -242,10 +290,11 @@ static void vmdk_free_last_extent(BlockDriverState *bs) s->extents = g_renew(VmdkExtent, s->extents, s->num_extents); } -static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) +/* Return -ve errno, or 0 on success and write CID into *pcid. */ +static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid) { char *desc; - uint32_t cid = 0xffffffff; + uint32_t cid; const char *p_name, *cid_str; size_t cid_str_size; BDRVVmdkState *s = bs->opaque; @@ -254,8 +303,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) desc = g_malloc0(DESC_SIZE); ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { - g_free(desc); - return 0; + goto out; } if (parent) { @@ -268,13 +316,21 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) desc[DESC_SIZE - 1] = '\0'; p_name = strstr(desc, cid_str); - if (p_name != NULL) { - p_name += cid_str_size; - sscanf(p_name, "%" SCNx32, &cid); + if (p_name == NULL) { + ret = -EINVAL; + goto out; } + p_name += cid_str_size; + if (sscanf(p_name, "%" SCNx32, &cid) != 1) { + ret = -EINVAL; + goto out; + } + *pcid = cid; + ret = 0; +out: g_free(desc); - return cid; + return ret; } static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) @@ -322,7 +378,16 @@ static int vmdk_is_cid_valid(BlockDriverState *bs) if (!s->cid_checked && bs->backing) { BlockDriverState *p_bs = bs->backing->bs; - cur_pcid = vmdk_read_cid(p_bs, 0); + if (strcmp(p_bs->drv->format_name, "vmdk")) { + /* Backing file is not in vmdk format, so it does not have + * a CID, which makes the overlay's parent CID invalid */ + return 0; + } + + if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) { + /* read failure: report as not valid */ + return 0; + } if (s->parent_cid != cur_pcid) { /* CID not valid */ return 0; @@ -366,12 +431,16 @@ static int vmdk_parent_open(BlockDriverState *bs) ret = -EINVAL; goto out; } - if ((end_name - p_name) > sizeof(bs->backing_file) - 1) { + if ((end_name - p_name) > sizeof(bs->auto_backing_file) - 1) { ret = -EINVAL; goto out; } - pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); + pstrcpy(bs->auto_backing_file, end_name - p_name + 1, p_name); + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + bs->auto_backing_file); + pstrcpy(bs->backing_format, sizeof(bs->backing_format), + "vmdk"); } out: @@ -398,11 +467,22 @@ static int vmdk_add_extent(BlockDriverState *bs, error_setg(errp, "Invalid granularity, image may be corrupt"); return -EFBIG; } - if (l1_size > 512 * 1024 * 1024) { - /* Although with big capacity and small l1_entry_sectors, we can get a + if (l1_size > 32 * 1024 * 1024) { + /* + * Although with big capacity and small l1_entry_sectors, we can get a * big l1_size, we don't want unbounded value to allocate the table. - * Limit it to 512M, which is 16PB for default cluster and L2 table - * size */ + * Limit it to 32M, which is enough to store: + * 8TB - for both VMDK3 & VMDK4 with + * minimal cluster size: 512B + * minimal L2 table size: 512 entries + * 8 TB is still more than the maximal value supported for + * VMDK3 & VMDK4 which is 2TB. + * 64TB - for "ESXi seSparse Extent" + * minimal cluster size: 512B (default is 4KB) + * L2 table size: 4096 entries (const). + * 64TB is more than the maximal value supported for + * seSparse VMDKs (which is slightly less than 64TB) + */ error_setg(errp, "L1 size too big"); return -EFBIG; } @@ -427,6 +507,7 @@ static int vmdk_add_extent(BlockDriverState *bs, extent->l2_size = l2_size; extent->cluster_sectors = flat ? sectors : cluster_sectors; extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors); + extent->entry_size = sizeof(uint32_t); if (s->num_extents > 1) { extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; @@ -448,7 +529,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, int i; /* read the L1 table */ - l1_size = extent->l1_size * sizeof(uint32_t); + l1_size = extent->l1_size * extent->entry_size; extent->l1_table = g_try_malloc(l1_size); if (l1_size && extent->l1_table == NULL) { return -ENOMEM; @@ -459,16 +540,23 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, extent->l1_table, l1_size); if (ret < 0) { + bdrv_refresh_filename(extent->file->bs); error_setg_errno(errp, -ret, "Could not read l1 table from extent '%s'", extent->file->bs->filename); goto fail_l1; } for (i = 0; i < extent->l1_size; i++) { - le32_to_cpus(&extent->l1_table[i]); + if (extent->entry_size == sizeof(uint64_t)) { + le64_to_cpus((uint64_t *)extent->l1_table + i); + } else { + assert(extent->entry_size == sizeof(uint32_t)); + le32_to_cpus((uint32_t *)extent->l1_table + i); + } } if (extent->l1_backup_table_offset) { + assert(!extent->sesparse); extent->l1_backup_table = g_try_malloc(l1_size); if (l1_size && extent->l1_backup_table == NULL) { ret = -ENOMEM; @@ -479,6 +567,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, extent->l1_backup_table, l1_size); if (ret < 0) { + bdrv_refresh_filename(extent->file->bs); error_setg_errno(errp, -ret, "Could not read l1 backup table from extent '%s'", extent->file->bs->filename); @@ -490,7 +579,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, } extent->l2_cache = - g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE); + g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE); return 0; fail_l1b: g_free(extent->l1_backup_table); @@ -510,6 +599,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs, ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); if (ret < 0) { + bdrv_refresh_filename(file->bs); error_setg_errno(errp, -ret, "Could not read header from file '%s'", file->bs->filename); @@ -535,6 +625,205 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs, return ret; } +#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe) +#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe) + +/* Strict checks - format not officially documented */ +static int check_se_sparse_const_header(VMDKSESparseConstHeader *header, + Error **errp) +{ + header->magic = le64_to_cpu(header->magic); + header->version = le64_to_cpu(header->version); + header->grain_size = le64_to_cpu(header->grain_size); + header->grain_table_size = le64_to_cpu(header->grain_table_size); + header->flags = le64_to_cpu(header->flags); + header->reserved1 = le64_to_cpu(header->reserved1); + header->reserved2 = le64_to_cpu(header->reserved2); + header->reserved3 = le64_to_cpu(header->reserved3); + header->reserved4 = le64_to_cpu(header->reserved4); + + header->volatile_header_offset = + le64_to_cpu(header->volatile_header_offset); + header->volatile_header_size = le64_to_cpu(header->volatile_header_size); + + header->journal_header_offset = le64_to_cpu(header->journal_header_offset); + header->journal_header_size = le64_to_cpu(header->journal_header_size); + + header->journal_offset = le64_to_cpu(header->journal_offset); + header->journal_size = le64_to_cpu(header->journal_size); + + header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset); + header->grain_dir_size = le64_to_cpu(header->grain_dir_size); + + header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset); + header->grain_tables_size = le64_to_cpu(header->grain_tables_size); + + header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset); + header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size); + + header->backmap_offset = le64_to_cpu(header->backmap_offset); + header->backmap_size = le64_to_cpu(header->backmap_size); + + header->grains_offset = le64_to_cpu(header->grains_offset); + header->grains_size = le64_to_cpu(header->grains_size); + + if (header->magic != SESPARSE_CONST_HEADER_MAGIC) { + error_setg(errp, "Bad const header magic: 0x%016" PRIx64, + header->magic); + return -EINVAL; + } + + if (header->version != 0x0000000200000001) { + error_setg(errp, "Unsupported version: 0x%016" PRIx64, + header->version); + return -ENOTSUP; + } + + if (header->grain_size != 8) { + error_setg(errp, "Unsupported grain size: %" PRIu64, + header->grain_size); + return -ENOTSUP; + } + + if (header->grain_table_size != 64) { + error_setg(errp, "Unsupported grain table size: %" PRIu64, + header->grain_table_size); + return -ENOTSUP; + } + + if (header->flags != 0) { + error_setg(errp, "Unsupported flags: 0x%016" PRIx64, + header->flags); + return -ENOTSUP; + } + + if (header->reserved1 != 0 || header->reserved2 != 0 || + header->reserved3 != 0 || header->reserved4 != 0) { + error_setg(errp, "Unsupported reserved bits:" + " 0x%016" PRIx64 " 0x%016" PRIx64 + " 0x%016" PRIx64 " 0x%016" PRIx64, + header->reserved1, header->reserved2, + header->reserved3, header->reserved4); + return -ENOTSUP; + } + + /* check that padding is 0 */ + if (!buffer_is_zero(header->pad, sizeof(header->pad))) { + error_setg(errp, "Unsupported non-zero const header padding"); + return -ENOTSUP; + } + + return 0; +} + +static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header, + Error **errp) +{ + header->magic = le64_to_cpu(header->magic); + header->free_gt_number = le64_to_cpu(header->free_gt_number); + header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number); + header->replay_journal = le64_to_cpu(header->replay_journal); + + if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) { + error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64, + header->magic); + return -EINVAL; + } + + if (header->replay_journal) { + error_setg(errp, "Image is dirty, Replaying journal not supported"); + return -ENOTSUP; + } + + /* check that padding is 0 */ + if (!buffer_is_zero(header->pad, sizeof(header->pad))) { + error_setg(errp, "Unsupported non-zero volatile header padding"); + return -ENOTSUP; + } + + return 0; +} + +static int vmdk_open_se_sparse(BlockDriverState *bs, + BdrvChild *file, + int flags, Error **errp) +{ + int ret; + VMDKSESparseConstHeader const_header; + VMDKSESparseVolatileHeader volatile_header; + VmdkExtent *extent; + + ret = bdrv_apply_auto_read_only(bs, + "No write support for seSparse images available", errp); + if (ret < 0) { + return ret; + } + + assert(sizeof(const_header) == SECTOR_SIZE); + + ret = bdrv_pread(file, 0, &const_header, sizeof(const_header)); + if (ret < 0) { + bdrv_refresh_filename(file->bs); + error_setg_errno(errp, -ret, + "Could not read const header from file '%s'", + file->bs->filename); + return ret; + } + + /* check const header */ + ret = check_se_sparse_const_header(&const_header, errp); + if (ret < 0) { + return ret; + } + + assert(sizeof(volatile_header) == SECTOR_SIZE); + + ret = bdrv_pread(file, + const_header.volatile_header_offset * SECTOR_SIZE, + &volatile_header, sizeof(volatile_header)); + if (ret < 0) { + bdrv_refresh_filename(file->bs); + error_setg_errno(errp, -ret, + "Could not read volatile header from file '%s'", + file->bs->filename); + return ret; + } + + /* check volatile header */ + ret = check_se_sparse_volatile_header(&volatile_header, errp); + if (ret < 0) { + return ret; + } + + ret = vmdk_add_extent(bs, file, false, + const_header.capacity, + const_header.grain_dir_offset * SECTOR_SIZE, + 0, + const_header.grain_dir_size * + SECTOR_SIZE / sizeof(uint64_t), + const_header.grain_table_size * + SECTOR_SIZE / sizeof(uint64_t), + const_header.grain_size, + &extent, + errp); + if (ret < 0) { + return ret; + } + + extent->sesparse = true; + extent->sesparse_l2_tables_offset = const_header.grain_tables_offset; + extent->sesparse_clusters_offset = const_header.grains_offset; + extent->entry_size = sizeof(uint64_t); + + ret = vmdk_init_tables(bs, extent, errp); + if (ret) { + /* free extent allocated by vmdk_add_extent */ + vmdk_free_last_extent(bs); + } + + return ret; +} + static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, QDict *options, Error **errp); @@ -587,6 +876,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); if (ret < 0) { + bdrv_refresh_filename(file->bs); error_setg_errno(errp, -ret, "Could not read header from file '%s'", file->bs->filename); @@ -811,6 +1101,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, * RW [size in sectors] SPARSE "file-name.vmdk" * RW [size in sectors] VMFS "file-name.vmdk" * RW [size in sectors] VMFSSPARSE "file-name.vmdk" + * RW [size in sectors] SESPARSE "file-name.vmdk" */ flat_offset = -1; matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64, @@ -833,7 +1124,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, if (sectors <= 0 || (strcmp(type, "FLAT") && strcmp(type, "SPARSE") && - strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) || + strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") && + strcmp(type, "SESPARSE")) || (strcmp(access, "RW"))) { continue; } @@ -841,13 +1133,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, if (!path_is_absolute(fname) && !path_has_protocol(fname) && !desc_file_path[0]) { + bdrv_refresh_filename(bs->file->bs); error_setg(errp, "Cannot use relative extent paths with VMDK " "descriptor file '%s'", bs->file->bs->filename); return -EINVAL; } - extent_path = g_malloc0(PATH_MAX); - path_combine(extent_path, PATH_MAX, desc_file_path, fname); + extent_path = path_combine(desc_file_path, fname); ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents); assert(ret < 32); @@ -886,6 +1178,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, return ret; } extent = &s->extents[s->num_extents - 1]; + } else if (!strcmp(type, "SESPARSE")) { + ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); + if (ret) { + bdrv_unref_child(bs, extent_file); + return ret; + } + extent = &s->extents[s->num_extents - 1]; } else { error_setg(errp, "Unsupported extent type '%s'", type); bdrv_unref_child(bs, extent_file); @@ -920,6 +1219,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, if (strcmp(ct, "monolithicFlat") && strcmp(ct, "vmfs") && strcmp(ct, "vmfsSparse") && + strcmp(ct, "seSparse") && strcmp(ct, "twoGbMaxExtentSparse") && strcmp(ct, "twoGbMaxExtentFlat")) { error_setg(errp, "Unsupported image type '%s'", ct); @@ -975,8 +1275,14 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, if (ret) { goto fail; } - s->cid = vmdk_read_cid(bs, 0); - s->parent_cid = vmdk_read_cid(bs, 1); + ret = vmdk_read_cid(bs, 0, &s->cid); + if (ret) { + goto fail; + } + ret = vmdk_read_cid(bs, 1, &s->parent_cid); + if (ret) { + goto fail; + } qemu_co_mutex_init(&s->lock); /* Disable migration when VMDK images are used */ @@ -1058,6 +1364,8 @@ static int get_whole_cluster(BlockDriverState *bs, /* Read backing data before skip range */ if (skip_start_bytes > 0) { if (bs->backing) { + /* qcow2 emits this on bs->file instead of bs->backing */ + BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); ret = bdrv_pread(bs->backing, offset, whole_grain, skip_start_bytes); if (ret < 0) { @@ -1065,6 +1373,7 @@ static int get_whole_cluster(BlockDriverState *bs, goto exit; } } + BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE); ret = bdrv_pwrite(extent->file, cluster_offset, whole_grain, skip_start_bytes); if (ret < 0) { @@ -1075,6 +1384,8 @@ static int get_whole_cluster(BlockDriverState *bs, /* Read backing data after skip range */ if (skip_end_bytes < cluster_bytes) { if (bs->backing) { + /* qcow2 emits this on bs->file instead of bs->backing */ + BLKDBG_EVENT(extent->file, BLKDBG_COW_READ); ret = bdrv_pread(bs->backing, offset + skip_end_bytes, whole_grain + skip_end_bytes, cluster_bytes - skip_end_bytes); @@ -1083,6 +1394,7 @@ static int get_whole_cluster(BlockDriverState *bs, goto exit; } } + BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE); ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes, whole_grain + skip_end_bytes, cluster_bytes - skip_end_bytes); @@ -1103,6 +1415,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, { offset = cpu_to_le32(offset); /* update L2 table */ + BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE); if (bdrv_pwrite_sync(extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(offset)), @@ -1157,10 +1470,12 @@ static int get_cluster_offset(BlockDriverState *bs, { unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; - uint32_t min_count, *l2_table; + uint32_t min_count; + void *l2_table; bool zeroed = false; int64_t ret; int64_t cluster_sector; + unsigned int l2_size_bytes = extent->l2_size * extent->entry_size; if (m_data) { m_data->valid = 0; @@ -1175,7 +1490,36 @@ static int get_cluster_offset(BlockDriverState *bs, if (l1_index >= extent->l1_size) { return VMDK_ERROR; } - l2_offset = extent->l1_table[l1_index]; + if (extent->sesparse) { + uint64_t l2_offset_u64; + + assert(extent->entry_size == sizeof(uint64_t)); + + l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index]; + if (l2_offset_u64 == 0) { + l2_offset = 0; + } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) { + /* + * Top most nibble is 0x1 if grain table is allocated. + * strict check - top most 4 bytes must be 0x10000000 since max + * supported size is 64TB for disk - so no more than 64TB / 16MB + * grain directories which is smaller than uint32, + * where 16MB is the only supported default grain table coverage. + */ + return VMDK_ERROR; + } else { + l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff; + l2_offset_u64 = extent->sesparse_l2_tables_offset + + l2_offset_u64 * l2_size_bytes / SECTOR_SIZE; + if (l2_offset_u64 > 0x00000000ffffffff) { + return VMDK_ERROR; + } + l2_offset = (unsigned int)(l2_offset_u64); + } + } else { + assert(extent->entry_size == sizeof(uint32_t)); + l2_offset = ((uint32_t *)extent->l1_table)[l1_index]; + } if (!l2_offset) { return VMDK_UNALLOC; } @@ -1187,7 +1531,7 @@ static int get_cluster_offset(BlockDriverState *bs, extent->l2_cache_counts[j] >>= 1; } } - l2_table = extent->l2_cache + (i * extent->l2_size); + l2_table = (char *)extent->l2_cache + (i * l2_size_bytes); goto found; } } @@ -1200,12 +1544,13 @@ static int get_cluster_offset(BlockDriverState *bs, min_index = i; } } - l2_table = extent->l2_cache + (min_index * extent->l2_size); + l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes); + BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD); if (bdrv_pread(extent->file, (int64_t)l2_offset * 512, l2_table, - extent->l2_size * sizeof(uint32_t) - ) != extent->l2_size * sizeof(uint32_t)) { + l2_size_bytes + ) != l2_size_bytes) { return VMDK_ERROR; } @@ -1213,16 +1558,49 @@ static int get_cluster_offset(BlockDriverState *bs, extent->l2_cache_counts[min_index] = 1; found: l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; - cluster_sector = le32_to_cpu(l2_table[l2_index]); - if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) { - zeroed = true; + if (extent->sesparse) { + cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]); + switch (cluster_sector & 0xf000000000000000) { + case 0x0000000000000000: + /* unallocated grain */ + if (cluster_sector != 0) { + return VMDK_ERROR; + } + break; + case 0x1000000000000000: + /* scsi-unmapped grain - fallthrough */ + case 0x2000000000000000: + /* zero grain */ + zeroed = true; + break; + case 0x3000000000000000: + /* allocated grain */ + cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) | + ((cluster_sector & 0x0000ffffffffffff) << 12)); + cluster_sector = extent->sesparse_clusters_offset + + cluster_sector * extent->cluster_sectors; + break; + default: + return VMDK_ERROR; + } + } else { + cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]); + + if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) { + zeroed = true; + } } if (!cluster_sector || zeroed) { if (!allocate) { return zeroed ? VMDK_ZEROED : VMDK_UNALLOC; } + assert(!extent->sesparse); + + if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) { + return VMDK_ERROR; + } cluster_sector = extent->next_cluster_sector; extent->next_cluster_sector += extent->cluster_sectors; @@ -1242,7 +1620,7 @@ static int get_cluster_offset(BlockDriverState *bs, m_data->l1_index = l1_index; m_data->l2_index = l2_index; m_data->l2_offset = l2_offset; - m_data->l2_cache_entry = &l2_table[l2_index]; + m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index; } } *cluster_offset = cluster_sector << BDRV_SECTOR_BITS; @@ -1278,33 +1656,27 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent, return extent_relative_offset % cluster_size; } -static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent, - int64_t sector_num) -{ - uint64_t offset; - offset = vmdk_find_offset_in_cluster(extent, sector_num * BDRV_SECTOR_SIZE); - return offset / BDRV_SECTOR_SIZE; -} - -static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) { BDRVVmdkState *s = bs->opaque; int64_t index_in_cluster, n, ret; - uint64_t offset; + uint64_t cluster_offset; VmdkExtent *extent; - extent = find_extent(s, sector_num, NULL); + extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL); if (!extent) { - return 0; + return -EIO; } qemu_co_mutex_lock(&s->lock); - ret = get_cluster_offset(bs, extent, NULL, - sector_num * 512, false, &offset, + ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset, 0, 0); qemu_co_mutex_unlock(&s->lock); - index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); + index_in_cluster = vmdk_find_offset_in_cluster(extent, offset); switch (ret) { case VMDK_ERROR: ret = -EIO; @@ -1319,18 +1691,17 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, ret = BDRV_BLOCK_DATA; if (!extent->compressed) { ret |= BDRV_BLOCK_OFFSET_VALID; - ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS)) - & BDRV_BLOCK_OFFSET_MASK; + *map = cluster_offset + index_in_cluster; + if (extent->flat) { + ret |= BDRV_BLOCK_RECURSE; + } } *file = extent->file->bs; break; } - n = extent->cluster_sectors - index_in_cluster; - if (n > nb_sectors) { - n = nb_sectors; - } - *pnum = n; + n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster; + *pnum = MIN(n, bytes); return ret; } @@ -1343,7 +1714,6 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, VmdkGrainMarker *data = NULL; uLongf buf_len; QEMUIOVector local_qiov; - struct iovec iov; int64_t write_offset; int64_t write_end_sector; @@ -1371,17 +1741,17 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, data->size = cpu_to_le32(buf_len); n_bytes = buf_len + sizeof(VmdkGrainMarker); - iov = (struct iovec) { - .iov_base = data, - .iov_len = n_bytes, - }; - qemu_iovec_init_external(&local_qiov, &iov, 1); + qemu_iovec_init_buf(&local_qiov, data, n_bytes); + + BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED); } else { qemu_iovec_init(&local_qiov, qiov->niov); qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes); + + BLKDBG_EVENT(extent->file, BLKDBG_WRITE_AIO); } - write_offset = cluster_offset + offset_in_cluster, + write_offset = cluster_offset + offset_in_cluster; ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes, &local_qiov, 0); @@ -1420,6 +1790,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, if (!extent->compressed) { + BLKDBG_EVENT(extent->file, BLKDBG_READ_AIO); ret = bdrv_co_preadv(extent->file, cluster_offset + offset_in_cluster, bytes, qiov, 0); @@ -1433,6 +1804,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, buf_bytes = cluster_bytes * 2; cluster_buf = g_malloc(buf_bytes); uncomp_buf = g_malloc(cluster_bytes); + BLKDBG_EVENT(extent->file, BLKDBG_READ_COMPRESSED); ret = bdrv_pread(extent->file, cluster_offset, cluster_buf, buf_bytes); @@ -1510,6 +1882,8 @@ vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, qemu_iovec_reset(&local_qiov); qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); + /* qcow2 emits this on bs->file instead of bs->backing */ + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); ret = bdrv_co_preadv(bs->backing, offset, n_bytes, &local_qiov, 0); if (ret < 0) { @@ -1575,6 +1949,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, if (!extent) { return -EIO; } + if (extent->sesparse) { + return -ENOTSUP; + } offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset); n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE - offset_in_cluster); @@ -1662,6 +2039,27 @@ static int coroutine_fn vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov) { + if (bytes == 0) { + /* The caller will write bytes 0 to signal EOF. + * When receive it, we align EOF to a sector boundary. */ + BDRVVmdkState *s = bs->opaque; + int i, ret; + int64_t length; + + for (i = 0; i < s->num_extents; i++) { + length = bdrv_getlength(s->extents[i].file->bs); + if (length < 0) { + return length; + } + length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); + ret = bdrv_truncate(s->extents[i].file, length, + PREALLOC_MODE_OFF, NULL); + if (ret < 0) { + return ret; + } + } + return 0; + } return vmdk_co_pwritev(bs, offset, bytes, qiov, 0); } @@ -1684,40 +2082,19 @@ static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs, return ret; } -static int vmdk_create_extent(const char *filename, int64_t filesize, - bool flat, bool compress, bool zeroed_grain, - QemuOpts *opts, Error **errp) +static int vmdk_init_extent(BlockBackend *blk, + int64_t filesize, bool flat, + bool compress, bool zeroed_grain, + Error **errp) { int ret, i; - BlockBackend *blk = NULL; VMDK4Header header; - Error *local_err = NULL; uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count; uint32_t *gd_buf = NULL; int gd_buf_size; - ret = bdrv_create_file(filename, opts, &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - goto exit; - } - - blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, - &local_err); - if (blk == NULL) { - error_propagate(errp, local_err); - ret = -EIO; - goto exit; - } - - blk_set_allow_write_beyond_eof(blk, true); - if (flat) { - ret = blk_truncate(blk, filesize); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not truncate file"); - } + ret = blk_truncate(blk, filesize, PREALLOC_MODE_OFF, errp); goto exit; } magic = cpu_to_be32(VMDK4_MAGIC); @@ -1780,9 +2157,9 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, goto exit; } - ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9); + ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, + PREALLOC_MODE_OFF, errp); if (ret < 0) { - error_setg_errno(errp, -ret, "Could not truncate file"); goto exit; } @@ -1809,15 +2186,50 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, gd_buf, gd_buf_size, 0); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); - goto exit; } ret = 0; +exit: + g_free(gd_buf); + return ret; +} + +static int vmdk_create_extent(const char *filename, int64_t filesize, + bool flat, bool compress, bool zeroed_grain, + BlockBackend **pbb, + QemuOpts *opts, Error **errp) +{ + int ret; + BlockBackend *blk = NULL; + Error *local_err = NULL; + + ret = bdrv_create_file(filename, opts, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + goto exit; + } + + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { + error_propagate(errp, local_err); + ret = -EIO; + goto exit; + } + + blk_set_allow_write_beyond_eof(blk, true); + + ret = vmdk_init_extent(blk, filesize, flat, compress, zeroed_grain, errp); exit: if (blk) { - blk_unref(blk); + if (pbb) { + *pbb = blk; + } else { + blk_unref(blk); + blk = NULL; + } } - g_free(gd_buf); return ret; } @@ -1861,32 +2273,57 @@ static int filename_decompose(const char *filename, char *path, char *prefix, return VMDK_OK; } -static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) +/* + * idx == 0: get or create the descriptor file (also the image file if in a + * non-split format. + * idx >= 1: get the n-th extent if in a split subformat + */ +typedef BlockBackend *(*vmdk_create_extent_fn)(int64_t size, + int idx, + bool flat, + bool split, + bool compress, + bool zeroed_grain, + void *opaque, + Error **errp); + +static void vmdk_desc_add_extent(GString *desc, + const char *extent_line_fmt, + int64_t size, const char *filename) +{ + char *basename = g_path_get_basename(filename); + + g_string_append_printf(desc, extent_line_fmt, + DIV_ROUND_UP(size, BDRV_SECTOR_SIZE), basename); + g_free(basename); +} + +static int coroutine_fn vmdk_co_do_create(int64_t size, + BlockdevVmdkSubformat subformat, + BlockdevVmdkAdapterType adapter_type, + const char *backing_file, + const char *hw_version, + bool compat6, + bool zeroed_grain, + vmdk_create_extent_fn extent_fn, + void *opaque, + Error **errp) { - int idx = 0; - BlockBackend *new_blk = NULL; + int extent_idx; + BlockBackend *blk = NULL; + BlockBackend *extent_blk; Error *local_err = NULL; char *desc = NULL; - int64_t total_size = 0, filesize; - char *adapter_type = NULL; - char *backing_file = NULL; - char *hw_version = NULL; - char *fmt = NULL; int ret = 0; bool flat, split, compress; GString *ext_desc_lines; - char *path = g_malloc0(PATH_MAX); - char *prefix = g_malloc0(PATH_MAX); - char *postfix = g_malloc0(PATH_MAX); - char *desc_line = g_malloc0(BUF_SIZE); - char *ext_filename = g_malloc0(PATH_MAX); - char *desc_filename = g_malloc0(PATH_MAX); const int64_t split_size = 0x80000000; /* VMDK has constant split size */ - const char *desc_extent_line; + int64_t extent_size; + int64_t created_size = 0; + const char *extent_line_fmt; char *parent_desc_line = g_malloc0(BUF_SIZE); uint32_t parent_cid = 0xffffffff; uint32_t number_heads = 16; - bool zeroed_grain = false; uint32_t desc_offset = 0, desc_len; const char desc_template[] = "# Disk DescriptorFile\n" @@ -1910,71 +2347,35 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) ext_desc_lines = g_string_new(NULL); - if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) { - ret = -EINVAL; - goto exit; - } /* Read out options */ - total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), - BDRV_SECTOR_SIZE); - adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE); - backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); - hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION); - if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) { - if (strcmp(hw_version, "undefined")) { + if (compat6) { + if (hw_version) { error_setg(errp, "compat6 cannot be enabled with hwversion set"); ret = -EINVAL; goto exit; } - g_free(hw_version); - hw_version = g_strdup("6"); + hw_version = "6"; } - if (strcmp(hw_version, "undefined") == 0) { - g_free(hw_version); - hw_version = g_strdup("4"); - } - fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT); - if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false)) { - zeroed_grain = true; + if (!hw_version) { + hw_version = "4"; } - if (!adapter_type) { - adapter_type = g_strdup("ide"); - } else if (strcmp(adapter_type, "ide") && - strcmp(adapter_type, "buslogic") && - strcmp(adapter_type, "lsilogic") && - strcmp(adapter_type, "legacyESX")) { - error_setg(errp, "Unknown adapter type: '%s'", adapter_type); - ret = -EINVAL; - goto exit; - } - if (strcmp(adapter_type, "ide") != 0) { + if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) { /* that's the number of heads with which vmware operates when creating, exporting, etc. vmdk files with a non-ide adapter type */ number_heads = 255; } - if (!fmt) { - /* Default format to monolithicSparse */ - fmt = g_strdup("monolithicSparse"); - } else if (strcmp(fmt, "monolithicFlat") && - strcmp(fmt, "monolithicSparse") && - strcmp(fmt, "twoGbMaxExtentSparse") && - strcmp(fmt, "twoGbMaxExtentFlat") && - strcmp(fmt, "streamOptimized")) { - error_setg(errp, "Unknown subformat: '%s'", fmt); - ret = -EINVAL; - goto exit; - } - split = !(strcmp(fmt, "twoGbMaxExtentFlat") && - strcmp(fmt, "twoGbMaxExtentSparse")); - flat = !(strcmp(fmt, "monolithicFlat") && - strcmp(fmt, "twoGbMaxExtentFlat")); - compress = !strcmp(fmt, "streamOptimized"); + split = (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT) || + (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTSPARSE); + flat = (subformat == BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICFLAT) || + (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT); + compress = subformat == BLOCKDEV_VMDK_SUBFORMAT_STREAMOPTIMIZED; + if (flat) { - desc_extent_line = "RW %" PRId64 " FLAT \"%s\" 0\n"; + extent_line_fmt = "RW %" PRId64 " FLAT \"%s\" 0\n"; } else { - desc_extent_line = "RW %" PRId64 " SPARSE \"%s\"\n"; + extent_line_fmt = "RW %" PRId64 " SPARSE \"%s\"\n"; } if (flat && backing_file) { error_setg(errp, "Flat image can't have backing file"); @@ -1986,103 +2387,111 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) ret = -ENOTSUP; goto exit; } + + /* Create extents */ + if (split) { + extent_size = split_size; + } else { + extent_size = size; + } + if (!split && !flat) { + created_size = extent_size; + } else { + created_size = 0; + } + /* Get the descriptor file BDS */ + blk = extent_fn(created_size, 0, flat, split, compress, zeroed_grain, + opaque, errp); + if (!blk) { + ret = -EIO; + goto exit; + } + if (!split && !flat) { + vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, created_size, + blk_bs(blk)->filename); + } + if (backing_file) { - BlockBackend *blk; - char *full_backing = g_new0(char, PATH_MAX); - bdrv_get_full_backing_filename_from_filename(filename, backing_file, - full_backing, PATH_MAX, - &local_err); + BlockBackend *backing; + char *full_backing = + bdrv_get_full_backing_filename_from_filename(blk_bs(blk)->filename, + backing_file, + &local_err); if (local_err) { - g_free(full_backing); error_propagate(errp, local_err); ret = -ENOENT; goto exit; } + assert(full_backing); - blk = blk_new_open(full_backing, NULL, NULL, - BDRV_O_NO_BACKING, errp); + backing = blk_new_open(full_backing, NULL, NULL, + BDRV_O_NO_BACKING, errp); g_free(full_backing); - if (blk == NULL) { + if (backing == NULL) { ret = -EIO; goto exit; } - if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) { - blk_unref(blk); + if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) { + error_setg(errp, "Invalid backing file format: %s. Must be vmdk", + blk_bs(backing)->drv->format_name); + blk_unref(backing); ret = -EINVAL; goto exit; } - parent_cid = vmdk_read_cid(blk_bs(blk), 0); - blk_unref(blk); + ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid); + blk_unref(backing); + if (ret) { + error_setg(errp, "Failed to read parent CID"); + goto exit; + } snprintf(parent_desc_line, BUF_SIZE, "parentFileNameHint=\"%s\"", backing_file); } - - /* Create extents */ - filesize = total_size; - while (filesize > 0) { - int64_t size = filesize; - - if (split && size > split_size) { - size = split_size; - } - if (split) { - snprintf(desc_filename, PATH_MAX, "%s-%c%03d%s", - prefix, flat ? 'f' : 's', ++idx, postfix); - } else if (flat) { - snprintf(desc_filename, PATH_MAX, "%s-flat%s", prefix, postfix); - } else { - snprintf(desc_filename, PATH_MAX, "%s%s", prefix, postfix); - } - snprintf(ext_filename, PATH_MAX, "%s%s", path, desc_filename); - - if (vmdk_create_extent(ext_filename, size, - flat, compress, zeroed_grain, opts, errp)) { + extent_idx = 1; + while (created_size < size) { + int64_t cur_size = MIN(size - created_size, extent_size); + extent_blk = extent_fn(cur_size, extent_idx, flat, split, compress, + zeroed_grain, opaque, errp); + if (!extent_blk) { ret = -EINVAL; goto exit; } - filesize -= size; - - /* Format description line */ - snprintf(desc_line, BUF_SIZE, - desc_extent_line, size / BDRV_SECTOR_SIZE, desc_filename); - g_string_append(ext_desc_lines, desc_line); + vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, cur_size, + blk_bs(extent_blk)->filename); + created_size += cur_size; + extent_idx++; + blk_unref(extent_blk); + } + + /* Check whether we got excess extents */ + extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain, + opaque, NULL); + if (extent_blk) { + blk_unref(extent_blk); + error_setg(errp, "List of extents contains unused extents"); + ret = -EINVAL; + goto exit; } + /* generate descriptor file */ desc = g_strdup_printf(desc_template, g_random_int(), parent_cid, - fmt, + BlockdevVmdkSubformat_str(subformat), parent_desc_line, ext_desc_lines->str, hw_version, - total_size / + size / (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE), number_heads, - adapter_type); + BlockdevVmdkAdapterType_str(adapter_type)); desc_len = strlen(desc); /* the descriptor offset = 0x200 */ if (!split && !flat) { desc_offset = 0x200; - } else { - ret = bdrv_create_file(filename, opts, &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - goto exit; - } } - new_blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, - &local_err); - if (new_blk == NULL) { - error_propagate(errp, local_err); - ret = -EIO; - goto exit; - } - - blk_set_allow_write_beyond_eof(new_blk, true); - - ret = blk_pwrite(new_blk, desc_offset, desc, desc_len, 0); + ret = blk_pwrite(blk, desc_offset, desc, desc_len, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write description"); goto exit; @@ -2090,15 +2499,152 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) /* bdrv_pwrite write padding zeros to align to sector, we don't need that * for description file */ if (desc_offset == 0) { - ret = blk_truncate(new_blk, desc_len); + ret = blk_truncate(blk, desc_len, PREALLOC_MODE_OFF, errp); if (ret < 0) { - error_setg_errno(errp, -ret, "Could not truncate file"); + goto exit; } } + ret = 0; +exit: + if (blk) { + blk_unref(blk); + } + g_free(desc); + g_free(parent_desc_line); + g_string_free(ext_desc_lines, true); + return ret; +} + +typedef struct { + char *path; + char *prefix; + char *postfix; + QemuOpts *opts; +} VMDKCreateOptsData; + +static BlockBackend *vmdk_co_create_opts_cb(int64_t size, int idx, + bool flat, bool split, bool compress, + bool zeroed_grain, void *opaque, + Error **errp) +{ + BlockBackend *blk = NULL; + BlockDriverState *bs = NULL; + VMDKCreateOptsData *data = opaque; + char *ext_filename = NULL; + char *rel_filename = NULL; + + /* We're done, don't create excess extents. */ + if (size == -1) { + assert(errp == NULL); + return NULL; + } + + if (idx == 0) { + rel_filename = g_strdup_printf("%s%s", data->prefix, data->postfix); + } else if (split) { + rel_filename = g_strdup_printf("%s-%c%03d%s", + data->prefix, + flat ? 'f' : 's', idx, data->postfix); + } else { + assert(idx == 1); + rel_filename = g_strdup_printf("%s-flat%s", data->prefix, data->postfix); + } + + ext_filename = g_strdup_printf("%s%s", data->path, rel_filename); + g_free(rel_filename); + + if (vmdk_create_extent(ext_filename, size, + flat, compress, zeroed_grain, &blk, data->opts, + errp)) { + goto exit; + } + bdrv_unref(bs); exit: - if (new_blk) { - blk_unref(new_blk); + g_free(ext_filename); + return blk; +} + +static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) +{ + Error *local_err = NULL; + char *desc = NULL; + int64_t total_size = 0; + char *adapter_type = NULL; + BlockdevVmdkAdapterType adapter_type_enum; + char *backing_file = NULL; + char *hw_version = NULL; + char *fmt = NULL; + BlockdevVmdkSubformat subformat; + int ret = 0; + char *path = g_malloc0(PATH_MAX); + char *prefix = g_malloc0(PATH_MAX); + char *postfix = g_malloc0(PATH_MAX); + char *desc_line = g_malloc0(BUF_SIZE); + char *ext_filename = g_malloc0(PATH_MAX); + char *desc_filename = g_malloc0(PATH_MAX); + char *parent_desc_line = g_malloc0(BUF_SIZE); + bool zeroed_grain; + bool compat6; + VMDKCreateOptsData data; + + if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) { + ret = -EINVAL; + goto exit; } + /* Read out options */ + total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE); + backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); + hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION); + compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false); + if (strcmp(hw_version, "undefined") == 0) { + g_free(hw_version); + hw_version = NULL; + } + fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT); + zeroed_grain = qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false); + + if (adapter_type) { + adapter_type_enum = qapi_enum_parse(&BlockdevVmdkAdapterType_lookup, + adapter_type, + BLOCKDEV_VMDK_ADAPTER_TYPE_IDE, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto exit; + } + } else { + adapter_type_enum = BLOCKDEV_VMDK_ADAPTER_TYPE_IDE; + } + + if (!fmt) { + /* Default format to monolithicSparse */ + subformat = BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE; + } else { + subformat = qapi_enum_parse(&BlockdevVmdkSubformat_lookup, + fmt, + BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto exit; + } + } + data = (VMDKCreateOptsData){ + .prefix = prefix, + .postfix = postfix, + .path = path, + .opts = opts, + }; + ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum, + backing_file, hw_version, compat6, zeroed_grain, + vmdk_co_create_opts_cb, &data, errp); + +exit: g_free(adapter_type); g_free(backing_file); g_free(hw_version); @@ -2111,7 +2657,87 @@ exit: g_free(ext_filename); g_free(desc_filename); g_free(parent_desc_line); - g_string_free(ext_desc_lines, true); + return ret; +} + +static BlockBackend *vmdk_co_create_cb(int64_t size, int idx, + bool flat, bool split, bool compress, + bool zeroed_grain, void *opaque, + Error **errp) +{ + int ret; + BlockDriverState *bs; + BlockBackend *blk; + BlockdevCreateOptionsVmdk *opts = opaque; + + if (idx == 0) { + bs = bdrv_open_blockdev_ref(opts->file, errp); + } else { + int i; + BlockdevRefList *list = opts->extents; + for (i = 1; i < idx; i++) { + if (!list || !list->next) { + error_setg(errp, "Extent [%d] not specified", i); + return NULL; + } + list = list->next; + } + if (!list) { + error_setg(errp, "Extent [%d] not specified", idx - 1); + return NULL; + } + bs = bdrv_open_blockdev_ref(list->value, errp); + } + if (!bs) { + return NULL; + } + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, + BLK_PERM_ALL); + if (blk_insert_bs(blk, bs, errp)) { + bdrv_unref(bs); + return NULL; + } + blk_set_allow_write_beyond_eof(blk, true); + bdrv_unref(bs); + + if (size != -1) { + ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp); + if (ret) { + blk_unref(blk); + blk = NULL; + } + } + return blk; +} + +static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options, + Error **errp) +{ + int ret; + BlockdevCreateOptionsVmdk *opts; + + opts = &create_options->u.vmdk; + + /* Validate options */ + if (!QEMU_IS_ALIGNED(opts->size, BDRV_SECTOR_SIZE)) { + error_setg(errp, "Image size must be a multiple of 512 bytes"); + ret = -EINVAL; + goto out; + } + + ret = vmdk_co_do_create(opts->size, + opts->subformat, + opts->adapter_type, + opts->backing_file, + opts->hwversion, + false, + opts->zeroed_grain, + vmdk_co_create_cb, + opts, errp); + return ret; + +out: return ret; } @@ -2186,6 +2812,7 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) { ImageInfo *info = g_new0(ImageInfo, 1); + bdrv_refresh_filename(extent->file->bs); *info = (ImageInfo){ .filename = g_strdup(extent->file->bs->filename), .format = g_strdup(extent->type), @@ -2199,8 +2826,9 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) return info; } -static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result, - BdrvCheckMode fix) +static int coroutine_fn vmdk_co_check(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) { BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; @@ -2222,6 +2850,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result, fprintf(stderr, "ERROR: could not find extent for sector %" PRId64 "\n", sector_num); + ret = -EINVAL; break; } ret = get_cluster_offset(bs, extent, NULL, @@ -2233,22 +2862,32 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result, PRId64 "\n", sector_num); break; } - if (ret == VMDK_OK && - cluster_offset >= bdrv_getlength(extent->file->bs)) - { - fprintf(stderr, - "ERROR: cluster offset for sector %" - PRId64 " points after EOF\n", sector_num); - break; + if (ret == VMDK_OK) { + int64_t extent_len = bdrv_getlength(extent->file->bs); + if (extent_len < 0) { + fprintf(stderr, + "ERROR: could not get extent file length for sector %" + PRId64 "\n", sector_num); + ret = extent_len; + break; + } + if (cluster_offset >= extent_len) { + fprintf(stderr, + "ERROR: cluster offset for sector %" + PRId64 " points after EOF\n", sector_num); + ret = -EINVAL; + break; + } } sector_num += extent->cluster_sectors; } result->corruptions++; - return 0; + return ret; } -static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs) +static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs, + Error **errp) { int i; BDRVVmdkState *s = bs->opaque; @@ -2305,6 +2944,23 @@ static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) return 0; } +static void vmdk_gather_child_options(BlockDriverState *bs, QDict *target, + bool backing_overridden) +{ + /* No children but file and backing can be explicitly specified (TODO) */ + qdict_put(target, "file", + qobject_ref(bs->file->bs->full_open_options)); + + if (backing_overridden) { + if (bs->backing) { + qdict_put(target, "backing", + qobject_ref(bs->backing->bs->full_open_options)); + } else { + qdict_put_null(target, "backing"); + } + } +} + static QemuOptsList vmdk_create_opts = { .name = "vmdk-create-opts", .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head), @@ -2359,7 +3015,7 @@ static BlockDriver bdrv_vmdk = { .instance_size = sizeof(BDRVVmdkState), .bdrv_probe = vmdk_probe, .bdrv_open = vmdk_open, - .bdrv_check = vmdk_check, + .bdrv_co_check = vmdk_co_check, .bdrv_reopen_prepare = vmdk_reopen_prepare, .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_preadv = vmdk_co_preadv, @@ -2367,14 +3023,16 @@ static BlockDriver bdrv_vmdk = { .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed, .bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes, .bdrv_close = vmdk_close, - .bdrv_create = vmdk_create, + .bdrv_co_create_opts = vmdk_co_create_opts, + .bdrv_co_create = vmdk_co_create, .bdrv_co_flush_to_disk = vmdk_co_flush, - .bdrv_co_get_block_status = vmdk_co_get_block_status, + .bdrv_co_block_status = vmdk_co_block_status, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .bdrv_has_zero_init = vmdk_has_zero_init, .bdrv_get_specific_info = vmdk_get_specific_info, .bdrv_refresh_limits = vmdk_refresh_limits, .bdrv_get_info = vmdk_get_info, + .bdrv_gather_child_options = vmdk_gather_child_options, .supports_backing = true, .create_opts = &vmdk_create_opts,