X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/444dbc381b94f5b54da521df61e751f28b00ce88..34b5d2c68eb4082c288e70fb99c61af8f7b96fde:/block/vmdk.c diff --git a/block/vmdk.c b/block/vmdk.c index f2e861b074..96ef1b534e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -24,19 +24,33 @@ */ #include "qemu-common.h" -#include "block_int.h" -#include "module.h" -#include "migration.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "migration/migration.h" #include #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') #define VMDK4_COMPRESSION_DEFLATE 1 +#define VMDK4_FLAG_NL_DETECT (1 << 0) #define VMDK4_FLAG_RGD (1 << 1) +/* Zeroed-grain enable bit */ +#define VMDK4_FLAG_ZERO_GRAIN (1 << 2) #define VMDK4_FLAG_COMPRESS (1 << 16) #define VMDK4_FLAG_MARKER (1 << 17) #define VMDK4_GD_AT_END 0xffffffffffffffffULL +#define VMDK_GTE_ZEROED 0x1 + +/* VMDK internal error codes */ +#define VMDK_OK 0 +#define VMDK_ERROR (-1) +/* Cluster not allocated */ +#define VMDK_UNALLOC (-2) +#define VMDK_ZEROED (-3) + +#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain" + typedef struct { uint32_t version; uint32_t flags; @@ -48,19 +62,20 @@ typedef struct { uint32_t cylinders; uint32_t heads; uint32_t sectors_per_track; -} VMDK3Header; +} QEMU_PACKED VMDK3Header; typedef struct { uint32_t version; uint32_t flags; - int64_t capacity; - int64_t granularity; - int64_t desc_offset; - int64_t desc_size; - int32_t num_gtes_per_gte; - int64_t rgd_offset; - int64_t gd_offset; - int64_t grain_offset; + uint64_t capacity; + uint64_t granularity; + uint64_t desc_offset; + uint64_t desc_size; + /* Number of GrainTableEntries per GrainTable */ + uint32_t num_gtes_per_gt; + uint64_t rgd_offset; + uint64_t gd_offset; + uint64_t grain_offset; char filler[1]; char check_bytes[4]; uint16_t compressAlgorithm; @@ -73,6 +88,8 @@ typedef struct VmdkExtent { bool flat; bool compressed; bool has_marker; + bool has_zero_grain; + int version; int64_t sectors; int64_t end_sector; int64_t flat_start_offset; @@ -93,7 +110,7 @@ typedef struct VmdkExtent { typedef struct BDRVVmdkState { CoMutex lock; - int desc_offset; + uint64_t desc_offset; bool cid_updated; uint32_t parent_cid; int num_extents; @@ -108,13 +125,14 @@ typedef struct VmdkMetaData { unsigned int l2_index; unsigned int l2_offset; int valid; + uint32_t *l2_cache_entry; } VmdkMetaData; typedef struct VmdkGrainMarker { uint64_t lba; uint32_t size; uint8_t data[0]; -} VmdkGrainMarker; +} QEMU_PACKED VmdkGrainMarker; enum { MARKER_END_OF_STREAM = 0, @@ -198,7 +216,7 @@ static void vmdk_free_extents(BlockDriverState *bs) g_free(e->l2_cache); g_free(e->l1_backup_table); if (e->file != bs->file) { - bdrv_delete(e->file); + bdrv_unref(e->file); } } g_free(s->extents); @@ -368,15 +386,30 @@ static int vmdk_parent_open(BlockDriverState *bs) /* Create and append extent to the extent array. Return the added VmdkExtent * address. return NULL if allocation failed. */ -static VmdkExtent *vmdk_add_extent(BlockDriverState *bs, +static int vmdk_add_extent(BlockDriverState *bs, BlockDriverState *file, bool flat, int64_t sectors, int64_t l1_offset, int64_t l1_backup_offset, uint32_t l1_size, - int l2_size, unsigned int cluster_sectors) + int l2_size, uint64_t cluster_sectors, + VmdkExtent **new_extent) { VmdkExtent *extent; BDRVVmdkState *s = bs->opaque; + if (cluster_sectors > 0x200000) { + /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */ + error_report("invalid granularity, image may be corrupt"); + return -EINVAL; + } + if (l1_size > 512 * 1024 * 1024) { + /* Although with big capacity and small l1_entry_sectors, we can get a + * big l1_size, we don't want unbounded value to allocate the table. + * Limit it to 512M, which is 16PB for default cluster and L2 table + * size */ + error_report("L1 size too big"); + return -EFBIG; + } + s->extents = g_realloc(s->extents, (s->num_extents + 1) * sizeof(VmdkExtent)); extent = &s->extents[s->num_extents]; @@ -399,7 +432,10 @@ static VmdkExtent *vmdk_add_extent(BlockDriverState *bs, extent->end_sector = extent->sectors; } bs->total_sectors = extent->end_sector; - return extent; + if (new_extent) { + *new_extent = extent; + } + return 0; } static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent) @@ -445,9 +481,9 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent) return ret; } -static int vmdk_open_vmdk3(BlockDriverState *bs, - BlockDriverState *file, - int flags) +static int vmdk_open_vmfs_sparse(BlockDriverState *bs, + BlockDriverState *file, + int flags) { int ret; uint32_t magic; @@ -458,12 +494,17 @@ static int vmdk_open_vmdk3(BlockDriverState *bs, if (ret < 0) { return ret; } - extent = vmdk_add_extent(bs, - bs->file, false, - le32_to_cpu(header.disk_sectors), - le32_to_cpu(header.l1dir_offset) << 9, - 0, 1 << 6, 1 << 9, - le32_to_cpu(header.granularity)); + ret = vmdk_add_extent(bs, file, false, + le32_to_cpu(header.disk_sectors), + le32_to_cpu(header.l1dir_offset) << 9, + 0, + le32_to_cpu(header.l1dir_size), + 4096, + le32_to_cpu(header.granularity), + &extent); + if (ret < 0) { + return ret; + } ret = vmdk_init_tables(bs, extent); if (ret) { /* free extent allocated by vmdk_add_extent */ @@ -473,7 +514,7 @@ static int vmdk_open_vmdk3(BlockDriverState *bs, } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, - int64_t desc_offset); + uint64_t desc_offset); static int vmdk_open_vmdk4(BlockDriverState *bs, BlockDriverState *file, @@ -490,8 +531,11 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (ret < 0) { return ret; } - if (header.capacity == 0 && header.desc_offset) { - return vmdk_open_desc_file(bs, flags, header.desc_offset << 9); + if (header.capacity == 0) { + uint64_t desc_offset = le64_to_cpu(header.desc_offset); + if (desc_offset) { + return vmdk_open_desc_file(bs, flags, desc_offset << 9); + } } if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) { @@ -541,7 +585,21 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, header = footer.header; } - l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte) + if (le32_to_cpu(header.version) >= 3) { + char buf[64]; + snprintf(buf, sizeof(buf), "VMDK version %d", + le32_to_cpu(header.version)); + qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, + bs->device_name, "vmdk", buf); + return -ENOTSUP; + } + + if (le32_to_cpu(header.num_gtes_per_gt) > 512) { + error_report("L2 table size too big"); + return -EINVAL; + } + + l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt) * le64_to_cpu(header.granularity); if (l1_entry_sectors == 0) { return -EINVAL; @@ -551,16 +609,22 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; } - extent = vmdk_add_extent(bs, file, false, + ret = vmdk_add_extent(bs, file, false, le64_to_cpu(header.capacity), le64_to_cpu(header.gd_offset) << 9, l1_backup_offset, l1_size, - le32_to_cpu(header.num_gtes_per_gte), - le64_to_cpu(header.granularity)); + le32_to_cpu(header.num_gtes_per_gt), + le64_to_cpu(header.granularity), + &extent); + if (ret < 0) { + return ret; + } extent->compressed = le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; + extent->version = le32_to_cpu(header.version); + extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN; ret = vmdk_init_tables(bs, extent); if (ret) { /* free extent allocated by vmdk_add_extent */ @@ -578,22 +642,22 @@ static int vmdk_parse_description(const char *desc, const char *opt_name, opt_pos = strstr(desc, opt_name); if (!opt_pos) { - return -1; + return VMDK_ERROR; } /* Skip "=\"" following opt_name */ opt_pos += strlen(opt_name) + 2; if (opt_pos >= end) { - return -1; + return VMDK_ERROR; } opt_end = opt_pos; while (opt_end < end && *opt_end != '"') { opt_end++; } if (opt_end == end || buf_size < opt_end - opt_pos + 1) { - return -1; + return VMDK_ERROR; } pstrcpy(buf, opt_end - opt_pos + 1, opt_pos); - return 0; + return VMDK_OK; } /* Open an extent file and append to bs array */ @@ -610,13 +674,13 @@ static int vmdk_open_sparse(BlockDriverState *bs, magic = be32_to_cpu(magic); switch (magic) { case VMDK3_MAGIC: - return vmdk_open_vmdk3(bs, file, flags); + return vmdk_open_vmfs_sparse(bs, file, flags); break; case VMDK4_MAGIC: return vmdk_open_vmdk4(bs, file, flags); break; default: - return -EINVAL; + return -EMEDIUMTYPE; break; } } @@ -633,6 +697,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, int64_t flat_offset; char extent_path[PATH_MAX]; BlockDriverState *extent_file; + Error *local_err = NULL; while (*p) { /* parse extent line: @@ -641,7 +706,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, * RW [size in sectors] SPARSE "file-name.vmdk" */ flat_offset = -1; - ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64, + ret = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64, access, §ors, type, fname, &flat_offset); if (ret < 4 || strcmp(access, "RW")) { goto next_line; @@ -653,40 +718,39 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, return -EINVAL; } - /* trim the quotation marks around */ - if (fname[0] == '"') { - memmove(fname, fname + 1, strlen(fname)); - if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') { - return -EINVAL; - } - fname[strlen(fname) - 1] = '\0'; - } if (sectors <= 0 || - (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) || + (strcmp(type, "FLAT") && strcmp(type, "SPARSE") && + strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) || (strcmp(access, "RW"))) { goto next_line; } path_combine(extent_path, sizeof(extent_path), desc_file_path, fname); - ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags); + ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags, + &local_err); if (ret) { + qerror_report_err(local_err); + error_free(local_err); return ret; } /* save to extents array */ - if (!strcmp(type, "FLAT")) { + if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) { /* FLAT extent */ VmdkExtent *extent; - extent = vmdk_add_extent(bs, extent_file, true, sectors, - 0, 0, 0, 0, sectors); + ret = vmdk_add_extent(bs, extent_file, true, sectors, + 0, 0, 0, 0, sectors, &extent); + if (ret < 0) { + return ret; + } extent->flat_start_offset = flat_offset << 9; - } else if (!strcmp(type, "SPARSE")) { - /* SPARSE extent */ + } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) { + /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/ ret = vmdk_open_sparse(bs, extent_file, bs->open_flags); if (ret) { - bdrv_delete(extent_file); + bdrv_unref(extent_file); return ret; } } else { @@ -705,33 +769,49 @@ next_line: } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, - int64_t desc_offset) + uint64_t desc_offset) { int ret; - char buf[2048]; + char *buf = NULL; char ct[128]; BDRVVmdkState *s = bs->opaque; + int64_t size; - ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf)); + size = bdrv_getlength(bs->file); + if (size < 0) { + return -EINVAL; + } + + size = MIN(size, 1 << 20); /* avoid unbounded allocation */ + buf = g_malloc0(size + 1); + + ret = bdrv_pread(bs->file, desc_offset, buf, size); if (ret < 0) { - return ret; + goto exit; } - buf[2047] = '\0'; if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) { - return -EINVAL; + ret = -EMEDIUMTYPE; + goto exit; } if (strcmp(ct, "monolithicFlat") && + strcmp(ct, "vmfs") && + strcmp(ct, "vmfsSparse") && strcmp(ct, "twoGbMaxExtentSparse") && strcmp(ct, "twoGbMaxExtentFlat")) { fprintf(stderr, "VMDK: Not supported image type \"%s\""".\n", ct); - return -ENOTSUP; + ret = -ENOTSUP; + goto exit; } s->desc_offset = 0; - return vmdk_parse_extents(buf, bs, bs->file->filename); + ret = vmdk_parse_extents(buf, bs, bs->file->filename); +exit: + g_free(buf); + return ret; } -static int vmdk_open(BlockDriverState *bs, int flags) +static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) { int ret; BDRVVmdkState *s = bs->opaque; @@ -771,16 +851,17 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t offset, bool allocate) { - /* 128 sectors * 512 bytes each = grain size 64KB */ - uint8_t whole_grain[extent->cluster_sectors * 512]; + int ret = VMDK_OK; + uint8_t *whole_grain = NULL; /* we will be here if it's first write on non-exist grain(cluster). * try to read from parent image, if exist */ if (bs->backing_hd) { - int ret; - + whole_grain = + qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS); if (!vmdk_is_cid_valid(bs)) { - return -1; + ret = VMDK_ERROR; + goto exit; } /* floor offset to cluster */ @@ -788,30 +869,35 @@ static int get_whole_cluster(BlockDriverState *bs, ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain, extent->cluster_sectors); if (ret < 0) { - return -1; + ret = VMDK_ERROR; + goto exit; } /* Write grain only into the active image */ ret = bdrv_write(extent->file, cluster_offset, whole_grain, extent->cluster_sectors); if (ret < 0) { - return -1; + ret = VMDK_ERROR; + goto exit; } } - return 0; +exit: + qemu_vfree(whole_grain); + return ret; } static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) { + uint32_t offset; + QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset)); + offset = cpu_to_le32(m_data->offset); /* update L2 table */ if (bdrv_pwrite_sync( extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), - &(m_data->offset), - sizeof(m_data->offset) - ) < 0) { - return -1; + &offset, sizeof(offset)) < 0) { + return VMDK_ERROR; } /* update backup L2 table */ if (extent->l1_backup_table_offset != 0) { @@ -820,13 +906,15 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), - &(m_data->offset), sizeof(m_data->offset) - ) < 0) { - return -1; + &offset, sizeof(offset)) < 0) { + return VMDK_ERROR; } } + if (m_data->l2_cache_entry) { + *m_data->l2_cache_entry = offset; + } - return 0; + return VMDK_OK; } static int get_cluster_offset(BlockDriverState *bs, @@ -838,24 +926,25 @@ static int get_cluster_offset(BlockDriverState *bs, { unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; - uint32_t min_count, *l2_table, tmp = 0; + uint32_t min_count, *l2_table; + bool zeroed = false; if (m_data) { m_data->valid = 0; } if (extent->flat) { *cluster_offset = extent->flat_start_offset; - return 0; + return VMDK_OK; } offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE; l1_index = (offset >> 9) / extent->l1_entry_sectors; if (l1_index >= extent->l1_size) { - return -1; + return VMDK_ERROR; } l2_offset = extent->l1_table[l1_index]; if (!l2_offset) { - return -1; + return VMDK_UNALLOC; } for (i = 0; i < L2_CACHE_SIZE; i++) { if (l2_offset == extent->l2_cache_offsets[i]) { @@ -885,7 +974,7 @@ static int get_cluster_offset(BlockDriverState *bs, l2_table, extent->l2_size * sizeof(uint32_t) ) != extent->l2_size * sizeof(uint32_t)) { - return -1; + return VMDK_ERROR; } extent->l2_cache_offsets[min_index] = l2_offset; @@ -894,9 +983,21 @@ static int get_cluster_offset(BlockDriverState *bs, l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; *cluster_offset = le32_to_cpu(l2_table[l2_index]); - if (!*cluster_offset) { + if (m_data) { + m_data->valid = 1; + m_data->l1_index = l1_index; + m_data->l2_index = l2_index; + m_data->offset = *cluster_offset; + m_data->l2_offset = l2_offset; + m_data->l2_cache_entry = &l2_table[l2_index]; + } + if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) { + zeroed = true; + } + + if (!*cluster_offset || zeroed) { if (!allocate) { - return -1; + return zeroed ? VMDK_ZEROED : VMDK_UNALLOC; } /* Avoid the L2 tables update for the images that have snapshots. */ @@ -909,8 +1010,7 @@ static int get_cluster_offset(BlockDriverState *bs, } *cluster_offset >>= 9; - tmp = cpu_to_le32(*cluster_offset); - l2_table[l2_index] = tmp; + l2_table[l2_index] = cpu_to_le32(*cluster_offset); /* First of all we write grain itself, to avoid race condition * that may to corrupt the image. @@ -919,19 +1019,15 @@ static int get_cluster_offset(BlockDriverState *bs, */ if (get_whole_cluster( bs, extent, *cluster_offset, offset, allocate) == -1) { - return -1; + return VMDK_ERROR; } if (m_data) { - m_data->offset = tmp; - m_data->l1_index = l1_index; - m_data->l2_index = l2_index; - m_data->l2_offset = l2_offset; - m_data->valid = 1; + m_data->offset = *cluster_offset; } } *cluster_offset <<= 9; - return 0; + return VMDK_OK; } static VmdkExtent *find_extent(BDRVVmdkState *s, @@ -951,7 +1047,7 @@ static VmdkExtent *find_extent(BDRVVmdkState *s, return NULL; } -static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, +static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { BDRVVmdkState *s = bs->opaque; @@ -967,8 +1063,25 @@ static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, ret = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0, &offset); qemu_co_mutex_unlock(&s->lock); - /* get_cluster_offset returning 0 means success */ - ret = !ret; + + switch (ret) { + case VMDK_ERROR: + ret = -EIO; + break; + case VMDK_UNALLOC: + ret = 0; + break; + case VMDK_ZEROED: + ret = BDRV_BLOCK_ZERO; + break; + case VMDK_OK: + ret = BDRV_BLOCK_DATA; + if (extent->file == bs->file) { + ret |= BDRV_BLOCK_OFFSET_VALID | offset; + } + + break; + } index_in_cluster = sector_num % extent->cluster_sectors; n = extent->cluster_sectors - index_in_cluster; @@ -1092,6 +1205,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, BDRVVmdkState *s = bs->opaque; int ret; uint64_t n, index_in_cluster; + uint64_t extent_begin_sector, extent_relative_sector_num; VmdkExtent *extent = NULL; uint64_t cluster_offset; @@ -1103,14 +1217,16 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, ret = get_cluster_offset( bs, extent, NULL, sector_num << 9, 0, &cluster_offset); - index_in_cluster = sector_num % extent->cluster_sectors; + extent_begin_sector = extent->end_sector - extent->sectors; + extent_relative_sector_num = sector_num - extent_begin_sector; + index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; } - if (ret) { + if (ret != VMDK_OK) { /* if not allocated, try to read from parent image, if exist */ - if (bs->backing_hd) { + if (bs->backing_hd && ret != VMDK_ZEROED) { if (!vmdk_is_cid_valid(bs)) { return -EINVAL; } @@ -1147,13 +1263,25 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num, return ret; } +/** + * vmdk_write: + * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature + * if possible, otherwise return -ENOTSUP. + * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try + * with each cluster. By dry run we can find if the zero write + * is possible without modifying image data. + * + * Returns: error code with 0 for success. + */ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) + const uint8_t *buf, int nb_sectors, + bool zeroed, bool zero_dry_run) { BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; int n, ret; int64_t index_in_cluster; + uint64_t extent_begin_sector, extent_relative_sector_num; uint64_t cluster_offset; VmdkMetaData m_data; @@ -1177,7 +1305,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, sector_num << 9, !extent->compressed, &cluster_offset); if (extent->compressed) { - if (ret == 0) { + if (ret == VMDK_OK) { /* Refuse write to allocated cluster for streamOptimized */ fprintf(stderr, "VMDK: can't write to allocated cluster" @@ -1193,25 +1321,44 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, &cluster_offset); } } - if (ret) { + if (ret == VMDK_ERROR) { return -EINVAL; } - index_in_cluster = sector_num % extent->cluster_sectors; + extent_begin_sector = extent->end_sector - extent->sectors; + extent_relative_sector_num = sector_num - extent_begin_sector; + index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; } - - ret = vmdk_write_extent(extent, - cluster_offset, index_in_cluster * 512, - buf, n, sector_num); - if (ret) { - return ret; - } - if (m_data.valid) { - /* update L2 tables */ - if (vmdk_L2update(extent, &m_data) == -1) { - return -EIO; + if (zeroed) { + /* Do zeroed write, buf is ignored */ + if (extent->has_zero_grain && + index_in_cluster == 0 && + n >= extent->cluster_sectors) { + n = extent->cluster_sectors; + if (!zero_dry_run) { + m_data.offset = VMDK_GTE_ZEROED; + /* update L2 tables */ + if (vmdk_L2update(extent, &m_data) != VMDK_OK) { + return -EIO; + } + } + } else { + return -ENOTSUP; + } + } else { + ret = vmdk_write_extent(extent, + cluster_offset, index_in_cluster * 512, + buf, n, sector_num); + if (ret) { + return ret; + } + if (m_data.valid) { + /* update L2 tables */ + if (vmdk_L2update(extent, &m_data) != VMDK_OK) { + return -EIO; + } } } nb_sectors -= n; @@ -1237,14 +1384,30 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, int ret; BDRVVmdkState *s = bs->opaque; qemu_co_mutex_lock(&s->lock); - ret = vmdk_write(bs, sector_num, buf, nb_sectors); + ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false); qemu_co_mutex_unlock(&s->lock); return ret; } +static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors) +{ + int ret; + BDRVVmdkState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + /* write zeroes could fail if sectors not aligned to cluster, test it with + * dry_run == true before really updating image */ + ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true); + if (!ret) { + ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false); + } + qemu_co_mutex_unlock(&s->lock); + return ret; +} static int vmdk_create_extent(const char *filename, int64_t filesize, - bool flat, bool compress) + bool flat, bool compress, bool zeroed_grain) { int ret, i; int fd = 0; @@ -1266,18 +1429,19 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, } magic = cpu_to_be32(VMDK4_MAGIC); memset(&header, 0, sizeof(header)); - header.version = 1; - header.flags = - 3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0); + header.version = zeroed_grain ? 2 : 1; + header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT + | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0) + | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0); header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0; header.capacity = filesize / 512; header.granularity = 128; - header.num_gtes_per_gte = 512; + header.num_gtes_per_gt = 512; grains = (filesize / 512 + header.granularity - 1) / header.granularity; - gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9; + gt_size = ((header.num_gtes_per_gt * sizeof(uint32_t)) + 511) >> 9; gt_count = - (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte; + (grains + header.num_gtes_per_gt - 1) / header.num_gtes_per_gt; gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9; header.desc_offset = 1; @@ -1293,7 +1457,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, header.flags = cpu_to_le32(header.flags); header.capacity = cpu_to_le64(header.capacity); header.granularity = cpu_to_le64(header.granularity); - header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte); + header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt); header.desc_offset = cpu_to_le64(header.desc_offset); header.desc_size = cpu_to_le64(header.desc_size); header.rgd_offset = cpu_to_le64(header.rgd_offset); @@ -1359,7 +1523,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, if (filename == NULL || !strlen(filename)) { fprintf(stderr, "Vmdk: no filename provided.\n"); - return -1; + return VMDK_ERROR; } p = strrchr(filename, '/'); if (p == NULL) { @@ -1371,7 +1535,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, if (p != NULL) { p++; if (p - filename >= buf_len) { - return -1; + return VMDK_ERROR; } pstrcpy(path, p - filename + 1, filename); } else { @@ -1384,59 +1548,21 @@ static int filename_decompose(const char *filename, char *path, char *prefix, postfix[0] = '\0'; } else { if (q - p >= buf_len) { - return -1; + return VMDK_ERROR; } pstrcpy(prefix, q - p + 1, p); pstrcpy(postfix, buf_len, q); } - return 0; + return VMDK_OK; } -static int relative_path(char *dest, int dest_size, - const char *base, const char *target) -{ - int i = 0; - int n = 0; - const char *p, *q; -#ifdef _WIN32 - const char *sep = "\\"; -#else - const char *sep = "/"; -#endif - - if (!(dest && base && target)) { - return -1; - } - if (path_is_absolute(target)) { - dest[dest_size - 1] = '\0'; - strncpy(dest, target, dest_size - 1); - return 0; - } - while (base[i] == target[i]) { - i++; - } - p = &base[i]; - q = &target[i]; - while (*p) { - if (*p == *sep) { - n++; - } - p++; - } - dest[0] = '\0'; - for (; n; n--) { - pstrcat(dest, dest_size, ".."); - pstrcat(dest, dest_size, sep); - } - pstrcat(dest, dest_size, q); - return 0; -} - -static int vmdk_create(const char *filename, QEMUOptionParameter *options) +static int vmdk_create(const char *filename, QEMUOptionParameter *options, + Error **errp) { int fd, idx = 0; char desc[BUF_SIZE]; int64_t total_size = 0, filesize; + const char *adapter_type = NULL; const char *backing_file = NULL; const char *fmt = NULL; int flags = 0; @@ -1448,6 +1574,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) const char *desc_extent_line; char parent_desc_line[BUF_SIZE] = ""; uint32_t parent_cid = 0xffffffff; + uint32_t number_heads = 16; + bool zeroed_grain = false; const char desc_template[] = "# Disk DescriptorFile\n" "version=1\n" @@ -1464,9 +1592,10 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) "\n" "ddb.virtualHWVersion = \"%d\"\n" "ddb.geometry.cylinders = \"%" PRId64 "\"\n" - "ddb.geometry.heads = \"16\"\n" + "ddb.geometry.heads = \"%d\"\n" "ddb.geometry.sectors = \"63\"\n" - "ddb.adapterType = \"ide\"\n"; + "ddb.adapterType = \"%s\"\n"; + Error *local_err = NULL; if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) { return -EINVAL; @@ -1475,15 +1604,33 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) while (options && options->name) { if (!strcmp(options->name, BLOCK_OPT_SIZE)) { total_size = options->value.n; + } else if (!strcmp(options->name, BLOCK_OPT_ADAPTER_TYPE)) { + adapter_type = options->value.s; } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { backing_file = options->value.s; } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) { flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0; } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) { fmt = options->value.s; + } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) { + zeroed_grain |= options->value.n; } options++; } + if (!adapter_type) { + adapter_type = "ide"; + } else if (strcmp(adapter_type, "ide") && + strcmp(adapter_type, "buslogic") && + strcmp(adapter_type, "lsilogic") && + strcmp(adapter_type, "legacyESX")) { + fprintf(stderr, "VMDK: Unknown adapter type: '%s'.\n", adapter_type); + return -EINVAL; + } + if (strcmp(adapter_type, "ide") != 0) { + /* that's the number of heads with which vmware operates when + creating, exporting, etc. vmdk files with a non-ide adapter type */ + number_heads = 255; + } if (!fmt) { /* Default format to monolithicSparse */ fmt = "monolithicSparse"; @@ -1510,23 +1657,22 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) return -ENOTSUP; } if (backing_file) { - char parent_filename[PATH_MAX]; BlockDriverState *bs = bdrv_new(""); - ret = bdrv_open(bs, backing_file, 0, NULL); + ret = bdrv_open(bs, backing_file, NULL, 0, NULL, &local_err); if (ret != 0) { - bdrv_delete(bs); + qerror_report_err(local_err); + error_free(local_err); + bdrv_unref(bs); return ret; } if (strcmp(bs->drv->format_name, "vmdk")) { - bdrv_delete(bs); + bdrv_unref(bs); return -EINVAL; } parent_cid = vmdk_read_cid(bs, 0); - bdrv_delete(bs); - relative_path(parent_filename, sizeof(parent_filename), - filename, backing_file); + bdrv_unref(bs); snprintf(parent_desc_line, sizeof(parent_desc_line), - "parentFileNameHint=\"%s\"", parent_filename); + "parentFileNameHint=\"%s\"", backing_file); } /* Create extents */ @@ -1553,7 +1699,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) snprintf(ext_filename, sizeof(ext_filename), "%s%s", path, desc_filename); - if (vmdk_create_extent(ext_filename, size, flat, compress)) { + if (vmdk_create_extent(ext_filename, size, + flat, compress, zeroed_grain)) { return -EINVAL; } filesize -= size; @@ -1571,7 +1718,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) parent_desc_line, ext_desc_lines, (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4), - total_size / (int64_t)(63 * 16 * 512)); + total_size / (int64_t)(63 * number_heads * 512), number_heads, + adapter_type); if (split || flat) { fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, @@ -1649,12 +1797,35 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs) return ret; } +static int vmdk_has_zero_init(BlockDriverState *bs) +{ + int i; + BDRVVmdkState *s = bs->opaque; + + /* If has a flat extent and its underlying storage doesn't have zero init, + * return 0. */ + for (i = 0; i < s->num_extents; i++) { + if (s->extents[i].flat) { + if (!bdrv_has_zero_init(s->extents[i].file)) { + return 0; + } + } + } + return 1; +} + static QEMUOptionParameter vmdk_create_options[] = { { .name = BLOCK_OPT_SIZE, .type = OPT_SIZE, .help = "Virtual disk size" }, + { + .name = BLOCK_OPT_ADAPTER_TYPE, + .type = OPT_STRING, + .help = "Virtual adapter type, can be one of " + "ide (default), lsilogic, buslogic or legacyESX" + }, { .name = BLOCK_OPT_BACKING_FILE, .type = OPT_STRING, @@ -1672,24 +1843,31 @@ static QEMUOptionParameter vmdk_create_options[] = { "VMDK flat extent format, can be one of " "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} " }, + { + .name = BLOCK_OPT_ZEROED_GRAIN, + .type = OPT_FLAG, + .help = "Enable efficient zero writes using the zeroed-grain GTE feature" + }, { NULL } }; static BlockDriver bdrv_vmdk = { - .format_name = "vmdk", - .instance_size = sizeof(BDRVVmdkState), - .bdrv_probe = vmdk_probe, - .bdrv_open = vmdk_open, - .bdrv_reopen_prepare = vmdk_reopen_prepare, - .bdrv_read = vmdk_co_read, - .bdrv_write = vmdk_co_write, - .bdrv_close = vmdk_close, - .bdrv_create = vmdk_create, - .bdrv_co_flush_to_disk = vmdk_co_flush, - .bdrv_co_is_allocated = vmdk_co_is_allocated, - .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, - - .create_options = vmdk_create_options, + .format_name = "vmdk", + .instance_size = sizeof(BDRVVmdkState), + .bdrv_probe = vmdk_probe, + .bdrv_open = vmdk_open, + .bdrv_reopen_prepare = vmdk_reopen_prepare, + .bdrv_read = vmdk_co_read, + .bdrv_write = vmdk_co_write, + .bdrv_co_write_zeroes = vmdk_co_write_zeroes, + .bdrv_close = vmdk_close, + .bdrv_create = vmdk_create, + .bdrv_co_flush_to_disk = vmdk_co_flush, + .bdrv_co_get_block_status = vmdk_co_get_block_status, + .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, + .bdrv_has_zero_init = vmdk_has_zero_init, + + .create_options = vmdk_create_options, }; static void bdrv_vmdk_init(void)