uint32_t cylinders;
uint32_t heads;
uint32_t sectors_per_track;
-} VMDK3Header;
+} QEMU_PACKED VMDK3Header;
typedef struct {
uint32_t version;
uint32_t flags;
- int64_t capacity;
- int64_t granularity;
- int64_t desc_offset;
- int64_t desc_size;
- int32_t num_gtes_per_gte;
- int64_t rgd_offset;
- int64_t gd_offset;
- int64_t grain_offset;
+ uint64_t capacity;
+ uint64_t granularity;
+ uint64_t desc_offset;
+ uint64_t desc_size;
+ /* Number of GrainTableEntries per GrainTable */
+ uint32_t num_gtes_per_gt;
+ uint64_t rgd_offset;
+ uint64_t gd_offset;
+ uint64_t grain_offset;
char filler[1];
char check_bytes[4];
uint16_t compressAlgorithm;
typedef struct BDRVVmdkState {
CoMutex lock;
- int desc_offset;
+ uint64_t desc_offset;
bool cid_updated;
uint32_t parent_cid;
int num_extents;
uint64_t lba;
uint32_t size;
uint8_t data[0];
-} VmdkGrainMarker;
+} QEMU_PACKED VmdkGrainMarker;
enum {
MARKER_END_OF_STREAM = 0,
g_free(e->l2_cache);
g_free(e->l1_backup_table);
if (e->file != bs->file) {
- bdrv_delete(e->file);
+ bdrv_unref(e->file);
}
}
g_free(s->extents);
/* Create and append extent to the extent array. Return the added VmdkExtent
* address. return NULL if allocation failed. */
-static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
+static int vmdk_add_extent(BlockDriverState *bs,
BlockDriverState *file, bool flat, int64_t sectors,
int64_t l1_offset, int64_t l1_backup_offset,
uint32_t l1_size,
- int l2_size, unsigned int cluster_sectors)
+ int l2_size, uint64_t cluster_sectors,
+ VmdkExtent **new_extent)
{
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
+ if (cluster_sectors > 0x200000) {
+ /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
+ error_report("invalid granularity, image may be corrupt");
+ return -EINVAL;
+ }
+ if (l1_size > 512 * 1024 * 1024) {
+ /* Although with big capacity and small l1_entry_sectors, we can get a
+ * big l1_size, we don't want unbounded value to allocate the table.
+ * Limit it to 512M, which is 16PB for default cluster and L2 table
+ * size */
+ error_report("L1 size too big");
+ return -EFBIG;
+ }
+
s->extents = g_realloc(s->extents,
(s->num_extents + 1) * sizeof(VmdkExtent));
extent = &s->extents[s->num_extents];
extent->end_sector = extent->sectors;
}
bs->total_sectors = extent->end_sector;
- return extent;
+ if (new_extent) {
+ *new_extent = extent;
+ }
+ return 0;
}
static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
return ret;
}
-static int vmdk_open_vmdk3(BlockDriverState *bs,
- BlockDriverState *file,
- int flags)
+static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
+ BlockDriverState *file,
+ int flags)
{
int ret;
uint32_t magic;
if (ret < 0) {
return ret;
}
- extent = vmdk_add_extent(bs,
- bs->file, false,
- le32_to_cpu(header.disk_sectors),
- le32_to_cpu(header.l1dir_offset) << 9,
- 0, 1 << 6, 1 << 9,
- le32_to_cpu(header.granularity));
+ ret = vmdk_add_extent(bs, file, false,
+ le32_to_cpu(header.disk_sectors),
+ le32_to_cpu(header.l1dir_offset) << 9,
+ 0,
+ le32_to_cpu(header.l1dir_size),
+ 4096,
+ le32_to_cpu(header.granularity),
+ &extent);
+ if (ret < 0) {
+ return ret;
+ }
ret = vmdk_init_tables(bs, extent);
if (ret) {
/* free extent allocated by vmdk_add_extent */
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
- int64_t desc_offset);
+ uint64_t desc_offset);
static int vmdk_open_vmdk4(BlockDriverState *bs,
BlockDriverState *file,
if (ret < 0) {
return ret;
}
- if (header.capacity == 0 && header.desc_offset) {
- return vmdk_open_desc_file(bs, flags, header.desc_offset << 9);
+ if (header.capacity == 0) {
+ uint64_t desc_offset = le64_to_cpu(header.desc_offset);
+ if (desc_offset) {
+ return vmdk_open_desc_file(bs, flags, desc_offset << 9);
+ }
}
if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
header = footer.header;
}
- l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
+ if (le32_to_cpu(header.version) >= 3) {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "VMDK version %d",
+ le32_to_cpu(header.version));
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "vmdk", buf);
+ return -ENOTSUP;
+ }
+
+ if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
+ error_report("L2 table size too big");
+ return -EINVAL;
+ }
+
+ l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
* le64_to_cpu(header.granularity);
if (l1_entry_sectors == 0) {
return -EINVAL;
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
- extent = vmdk_add_extent(bs, file, false,
+ ret = vmdk_add_extent(bs, file, false,
le64_to_cpu(header.capacity),
le64_to_cpu(header.gd_offset) << 9,
l1_backup_offset,
l1_size,
- le32_to_cpu(header.num_gtes_per_gte),
- le64_to_cpu(header.granularity));
+ le32_to_cpu(header.num_gtes_per_gt),
+ le64_to_cpu(header.granularity),
+ &extent);
+ if (ret < 0) {
+ return ret;
+ }
extent->compressed =
le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
magic = be32_to_cpu(magic);
switch (magic) {
case VMDK3_MAGIC:
- return vmdk_open_vmdk3(bs, file, flags);
+ return vmdk_open_vmfs_sparse(bs, file, flags);
break;
case VMDK4_MAGIC:
return vmdk_open_vmdk4(bs, file, flags);
int64_t flat_offset;
char extent_path[PATH_MAX];
BlockDriverState *extent_file;
+ Error *local_err = NULL;
while (*p) {
/* parse extent line:
}
if (sectors <= 0 ||
- (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
+ (strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
(strcmp(access, "RW"))) {
goto next_line;
}
path_combine(extent_path, sizeof(extent_path),
desc_file_path, fname);
- ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags);
+ ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags,
+ &local_err);
if (ret) {
+ qerror_report_err(local_err);
+ error_free(local_err);
return ret;
}
/* save to extents array */
- if (!strcmp(type, "FLAT")) {
+ if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) {
/* FLAT extent */
VmdkExtent *extent;
- extent = vmdk_add_extent(bs, extent_file, true, sectors,
- 0, 0, 0, 0, sectors);
+ ret = vmdk_add_extent(bs, extent_file, true, sectors,
+ 0, 0, 0, 0, sectors, &extent);
+ if (ret < 0) {
+ return ret;
+ }
extent->flat_start_offset = flat_offset << 9;
- } else if (!strcmp(type, "SPARSE")) {
- /* SPARSE extent */
+ } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
+ /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
ret = vmdk_open_sparse(bs, extent_file, bs->open_flags);
if (ret) {
- bdrv_delete(extent_file);
+ bdrv_unref(extent_file);
return ret;
}
} else {
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
- int64_t desc_offset)
+ uint64_t desc_offset)
{
int ret;
- char buf[2048];
+ char *buf = NULL;
char ct[128];
BDRVVmdkState *s = bs->opaque;
+ int64_t size;
+
+ size = bdrv_getlength(bs->file);
+ if (size < 0) {
+ return -EINVAL;
+ }
+
+ size = MIN(size, 1 << 20); /* avoid unbounded allocation */
+ buf = g_malloc0(size + 1);
- ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf));
+ ret = bdrv_pread(bs->file, desc_offset, buf, size);
if (ret < 0) {
- return ret;
+ goto exit;
}
- buf[2047] = '\0';
if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
- return -EMEDIUMTYPE;
+ ret = -EMEDIUMTYPE;
+ goto exit;
}
if (strcmp(ct, "monolithicFlat") &&
+ strcmp(ct, "vmfs") &&
+ strcmp(ct, "vmfsSparse") &&
strcmp(ct, "twoGbMaxExtentSparse") &&
strcmp(ct, "twoGbMaxExtentFlat")) {
fprintf(stderr,
"VMDK: Not supported image type \"%s\""".\n", ct);
- return -ENOTSUP;
+ ret = -ENOTSUP;
+ goto exit;
}
s->desc_offset = 0;
- return vmdk_parse_extents(buf, bs, bs->file->filename);
+ ret = vmdk_parse_extents(buf, bs, bs->file->filename);
+exit:
+ g_free(buf);
+ return ret;
}
-static int vmdk_open(BlockDriverState *bs, QDict *options, int flags)
+static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
{
int ret;
BDRVVmdkState *s = bs->opaque;
uint64_t offset,
bool allocate)
{
- /* 128 sectors * 512 bytes each = grain size 64KB */
- uint8_t whole_grain[extent->cluster_sectors * 512];
+ int ret = VMDK_OK;
+ uint8_t *whole_grain = NULL;
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
if (bs->backing_hd) {
- int ret;
-
+ whole_grain =
+ qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
if (!vmdk_is_cid_valid(bs)) {
- return VMDK_ERROR;
+ ret = VMDK_ERROR;
+ goto exit;
}
/* floor offset to cluster */
ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
- return VMDK_ERROR;
+ ret = VMDK_ERROR;
+ goto exit;
}
/* Write grain only into the active image */
ret = bdrv_write(extent->file, cluster_offset, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
- return VMDK_ERROR;
+ ret = VMDK_ERROR;
+ goto exit;
}
}
- return VMDK_OK;
+exit:
+ qemu_vfree(whole_grain);
+ return ret;
}
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
return NULL;
}
-static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVVmdkState *s = bs->opaque;
sector_num * 512, 0, &offset);
qemu_co_mutex_unlock(&s->lock);
- ret = (ret == VMDK_OK || ret == VMDK_ZEROED);
+ switch (ret) {
+ case VMDK_ERROR:
+ ret = -EIO;
+ break;
+ case VMDK_UNALLOC:
+ ret = 0;
+ break;
+ case VMDK_ZEROED:
+ ret = BDRV_BLOCK_ZERO;
+ break;
+ case VMDK_OK:
+ ret = BDRV_BLOCK_DATA;
+ if (extent->file == bs->file) {
+ ret |= BDRV_BLOCK_OFFSET_VALID | offset;
+ }
+
+ break;
+ }
index_in_cluster = sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
/**
* vmdk_write:
* @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature
- * if possible, otherwise return -ENOTSUP.
- * @zero_dry_run: used for zeroed == true only, don't update L2 table, just
+ * if possible, otherwise return -ENOTSUP.
+ * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
+ * with each cluster. By dry run we can find if the zero write
+ * is possible without modifying image data.
*
* Returns: error code with 0 for success.
*/
int ret;
BDRVVmdkState *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
+ /* write zeroes could fail if sectors not aligned to cluster, test it with
+ * dry_run == true before really updating image */
ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
if (!ret) {
ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
return ret;
}
-
static int vmdk_create_extent(const char *filename, int64_t filesize,
bool flat, bool compress, bool zeroed_grain)
{
header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
header.capacity = filesize / 512;
header.granularity = 128;
- header.num_gtes_per_gte = 512;
+ header.num_gtes_per_gt = 512;
grains = (filesize / 512 + header.granularity - 1) / header.granularity;
- gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
+ gt_size = ((header.num_gtes_per_gt * sizeof(uint32_t)) + 511) >> 9;
gt_count =
- (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
+ (grains + header.num_gtes_per_gt - 1) / header.num_gtes_per_gt;
gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
header.desc_offset = 1;
header.flags = cpu_to_le32(header.flags);
header.capacity = cpu_to_le64(header.capacity);
header.granularity = cpu_to_le64(header.granularity);
- header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
+ header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
header.desc_offset = cpu_to_le64(header.desc_offset);
header.desc_size = cpu_to_le64(header.desc_size);
header.rgd_offset = cpu_to_le64(header.rgd_offset);
return VMDK_OK;
}
-static int relative_path(char *dest, int dest_size,
- const char *base, const char *target)
-{
- int i = 0;
- int n = 0;
- const char *p, *q;
-#ifdef _WIN32
- const char *sep = "\\";
-#else
- const char *sep = "/";
-#endif
-
- if (!(dest && base && target)) {
- return VMDK_ERROR;
- }
- if (path_is_absolute(target)) {
- pstrcpy(dest, dest_size, target);
- return VMDK_OK;
- }
- while (base[i] == target[i]) {
- i++;
- }
- p = &base[i];
- q = &target[i];
- while (*p) {
- if (*p == *sep) {
- n++;
- }
- p++;
- }
- dest[0] = '\0';
- for (; n; n--) {
- pstrcat(dest, dest_size, "..");
- pstrcat(dest, dest_size, sep);
- }
- pstrcat(dest, dest_size, q);
- return VMDK_OK;
-}
-
-static int vmdk_create(const char *filename, QEMUOptionParameter *options)
+static int vmdk_create(const char *filename, QEMUOptionParameter *options,
+ Error **errp)
{
int fd, idx = 0;
char desc[BUF_SIZE];
"ddb.geometry.heads = \"%d\"\n"
"ddb.geometry.sectors = \"63\"\n"
"ddb.adapterType = \"%s\"\n";
+ Error *local_err = NULL;
if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) {
return -EINVAL;
return -ENOTSUP;
}
if (backing_file) {
- char parent_filename[PATH_MAX];
BlockDriverState *bs = bdrv_new("");
- ret = bdrv_open(bs, backing_file, NULL, 0, NULL);
+ ret = bdrv_open(bs, backing_file, NULL, 0, NULL, &local_err);
if (ret != 0) {
- bdrv_delete(bs);
+ qerror_report_err(local_err);
+ error_free(local_err);
+ bdrv_unref(bs);
return ret;
}
if (strcmp(bs->drv->format_name, "vmdk")) {
- bdrv_delete(bs);
+ bdrv_unref(bs);
return -EINVAL;
}
parent_cid = vmdk_read_cid(bs, 0);
- bdrv_delete(bs);
- relative_path(parent_filename, sizeof(parent_filename),
- filename, backing_file);
+ bdrv_unref(bs);
snprintf(parent_desc_line, sizeof(parent_desc_line),
- "parentFileNameHint=\"%s\"", parent_filename);
+ "parentFileNameHint=\"%s\"", backing_file);
}
/* Create extents */
return ret;
}
+static int vmdk_has_zero_init(BlockDriverState *bs)
+{
+ int i;
+ BDRVVmdkState *s = bs->opaque;
+
+ /* If has a flat extent and its underlying storage doesn't have zero init,
+ * return 0. */
+ for (i = 0; i < s->num_extents; i++) {
+ if (s->extents[i].flat) {
+ if (!bdrv_has_zero_init(s->extents[i].file)) {
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
static QEMUOptionParameter vmdk_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
};
static BlockDriver bdrv_vmdk = {
- .format_name = "vmdk",
- .instance_size = sizeof(BDRVVmdkState),
- .bdrv_probe = vmdk_probe,
- .bdrv_open = vmdk_open,
- .bdrv_reopen_prepare = vmdk_reopen_prepare,
- .bdrv_read = vmdk_co_read,
- .bdrv_write = vmdk_co_write,
- .bdrv_co_write_zeroes = vmdk_co_write_zeroes,
- .bdrv_close = vmdk_close,
- .bdrv_create = vmdk_create,
- .bdrv_co_flush_to_disk = vmdk_co_flush,
- .bdrv_co_is_allocated = vmdk_co_is_allocated,
- .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
-
- .create_options = vmdk_create_options,
+ .format_name = "vmdk",
+ .instance_size = sizeof(BDRVVmdkState),
+ .bdrv_probe = vmdk_probe,
+ .bdrv_open = vmdk_open,
+ .bdrv_reopen_prepare = vmdk_reopen_prepare,
+ .bdrv_read = vmdk_co_read,
+ .bdrv_write = vmdk_co_write,
+ .bdrv_co_write_zeroes = vmdk_co_write_zeroes,
+ .bdrv_close = vmdk_close,
+ .bdrv_create = vmdk_create,
+ .bdrv_co_flush_to_disk = vmdk_co_flush,
+ .bdrv_co_get_block_status = vmdk_co_get_block_status,
+ .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
+ .bdrv_has_zero_init = vmdk_has_zero_init,
+
+ .create_options = vmdk_create_options,
};
static void bdrv_vmdk_init(void)