#define L2_CACHE_SIZE 16
-typedef struct BDRVVmdkState {
+typedef struct VmdkExtent {
+ BlockDriverState *file;
+ bool flat;
+ int64_t sectors;
+ int64_t end_sector;
int64_t l1_table_offset;
int64_t l1_backup_table_offset;
uint32_t *l1_table;
uint32_t l2_cache_counts[L2_CACHE_SIZE];
unsigned int cluster_sectors;
+} VmdkExtent;
+
+typedef struct BDRVVmdkState {
uint32_t parent_cid;
+ int num_extents;
+ /* Extent array with num_extents entries, ascend ordered by address */
+ VmdkExtent *extents;
} BDRVVmdkState;
typedef struct VmdkMetaData {
return 0;
magic = be32_to_cpu(*(uint32_t *)buf);
if (magic == VMDK3_MAGIC ||
- magic == VMDK4_MAGIC)
+ magic == VMDK4_MAGIC) {
return 100;
- else
+ } else {
+ const char *p = (const char *)buf;
+ const char *end = p + buf_size;
+ while (p < end) {
+ if (*p == '#') {
+ /* skip comment line */
+ while (p < end && *p != '\n') {
+ p++;
+ }
+ p++;
+ continue;
+ }
+ if (*p == ' ') {
+ while (p < end && *p == ' ') {
+ p++;
+ }
+ /* skip '\r' if windows line endings used. */
+ if (p < end && *p == '\r') {
+ p++;
+ }
+ /* only accept blank lines before 'version=' line */
+ if (p == end || *p != '\n') {
+ return 0;
+ }
+ p++;
+ continue;
+ }
+ if (end - p >= strlen("version=X\n")) {
+ if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
+ strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
+ return 100;
+ }
+ }
+ if (end - p >= strlen("version=X\r\n")) {
+ if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
+ strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
+ return 100;
+ }
+ }
+ return 0;
+ }
return 0;
+ }
}
#define CHECK_CID 1
#define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
#define HEADER_SIZE 512 // first sector of 512 bytes
+static void vmdk_free_extents(BlockDriverState *bs)
+{
+ int i;
+ BDRVVmdkState *s = bs->opaque;
+
+ for (i = 0; i < s->num_extents; i++) {
+ qemu_free(s->extents[i].l1_table);
+ qemu_free(s->extents[i].l2_cache);
+ qemu_free(s->extents[i].l1_backup_table);
+ }
+ qemu_free(s->extents);
+}
+
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
{
char desc[DESC_SIZE];
return 0;
}
-static int vmdk_open(BlockDriverState *bs, int flags)
+/* Create and append extent to the extent array. Return the added VmdkExtent
+ * address. return NULL if allocation failed. */
+static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
+ BlockDriverState *file, bool flat, int64_t sectors,
+ int64_t l1_offset, int64_t l1_backup_offset,
+ uint32_t l1_size,
+ int l2_size, unsigned int cluster_sectors)
{
+ VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
- uint32_t magic;
- int l1_size, i;
-
- if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
- goto fail;
- magic = be32_to_cpu(magic);
- if (magic == VMDK3_MAGIC) {
- VMDK3Header header;
-
- if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
- goto fail;
- s->cluster_sectors = le32_to_cpu(header.granularity);
- s->l2_size = 1 << 9;
- s->l1_size = 1 << 6;
- bs->total_sectors = le32_to_cpu(header.disk_sectors);
- s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
- s->l1_backup_table_offset = 0;
- s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
- } else if (magic == VMDK4_MAGIC) {
- VMDK4Header header;
-
- if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
- goto fail;
- bs->total_sectors = le64_to_cpu(header.capacity);
- s->cluster_sectors = le64_to_cpu(header.granularity);
- s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
- s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
- if (s->l1_entry_sectors <= 0)
- goto fail;
- s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
- / s->l1_entry_sectors;
- s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
- s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
-
- // try to open parent images, if exist
- if (vmdk_parent_open(bs) != 0)
- goto fail;
- // write the CID once after the image creation
- s->parent_cid = vmdk_read_cid(bs,1);
+ s->extents = qemu_realloc(s->extents,
+ (s->num_extents + 1) * sizeof(VmdkExtent));
+ extent = &s->extents[s->num_extents];
+ s->num_extents++;
+
+ memset(extent, 0, sizeof(VmdkExtent));
+ extent->file = file;
+ extent->flat = flat;
+ extent->sectors = sectors;
+ extent->l1_table_offset = l1_offset;
+ extent->l1_backup_table_offset = l1_backup_offset;
+ extent->l1_size = l1_size;
+ extent->l1_entry_sectors = l2_size * cluster_sectors;
+ extent->l2_size = l2_size;
+ extent->cluster_sectors = cluster_sectors;
+
+ if (s->num_extents > 1) {
+ extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
} else {
- goto fail;
+ extent->end_sector = extent->sectors;
}
+ bs->total_sectors = extent->end_sector;
+ return extent;
+}
+
+static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
+{
+ int ret;
+ int l1_size, i;
/* read the L1 table */
- l1_size = s->l1_size * sizeof(uint32_t);
- s->l1_table = qemu_malloc(l1_size);
- if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
- goto fail;
- for(i = 0; i < s->l1_size; i++) {
- le32_to_cpus(&s->l1_table[i]);
+ l1_size = extent->l1_size * sizeof(uint32_t);
+ extent->l1_table = qemu_malloc(l1_size);
+ ret = bdrv_pread(extent->file,
+ extent->l1_table_offset,
+ extent->l1_table,
+ l1_size);
+ if (ret < 0) {
+ goto fail_l1;
+ }
+ for (i = 0; i < extent->l1_size; i++) {
+ le32_to_cpus(&extent->l1_table[i]);
}
- if (s->l1_backup_table_offset) {
- s->l1_backup_table = qemu_malloc(l1_size);
- if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
- goto fail;
- for(i = 0; i < s->l1_size; i++) {
- le32_to_cpus(&s->l1_backup_table[i]);
+ if (extent->l1_backup_table_offset) {
+ extent->l1_backup_table = qemu_malloc(l1_size);
+ ret = bdrv_pread(extent->file,
+ extent->l1_backup_table_offset,
+ extent->l1_backup_table,
+ l1_size);
+ if (ret < 0) {
+ goto fail_l1b;
+ }
+ for (i = 0; i < extent->l1_size; i++) {
+ le32_to_cpus(&extent->l1_backup_table[i]);
}
}
- s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
+ extent->l2_cache =
+ qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
return 0;
- fail:
- qemu_free(s->l1_backup_table);
- qemu_free(s->l1_table);
- qemu_free(s->l2_cache);
- return -1;
+ fail_l1b:
+ qemu_free(extent->l1_backup_table);
+ fail_l1:
+ qemu_free(extent->l1_table);
+ return ret;
}
-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
- uint64_t offset, int allocate);
+static int vmdk_open_vmdk3(BlockDriverState *bs, int flags)
+{
+ int ret;
+ uint32_t magic;
+ VMDK3Header header;
+ VmdkExtent *extent;
+
+ ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
+ if (ret < 0) {
+ goto fail;
+ }
+ extent = vmdk_add_extent(bs,
+ bs->file, false,
+ le32_to_cpu(header.disk_sectors),
+ le32_to_cpu(header.l1dir_offset) << 9,
+ 0, 1 << 6, 1 << 9,
+ le32_to_cpu(header.granularity));
+ ret = vmdk_init_tables(bs, extent);
+ if (ret) {
+ /* vmdk_init_tables cleans up on fail, so only free allocation of
+ * vmdk_add_extent here. */
+ goto fail;
+ }
+ return 0;
+ fail:
+ vmdk_free_extents(bs);
+ return ret;
+}
-static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
- uint64_t offset, int allocate)
+static int vmdk_open_vmdk4(BlockDriverState *bs, int flags)
{
+ int ret;
+ uint32_t magic;
+ uint32_t l1_size, l1_entry_sectors;
+ VMDK4Header header;
BDRVVmdkState *s = bs->opaque;
- uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB
+ VmdkExtent *extent;
- // we will be here if it's first write on non-exist grain(cluster).
- // try to read from parent image, if exist
+ ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
+ if (ret < 0) {
+ goto fail;
+ }
+ l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
+ * le64_to_cpu(header.granularity);
+ l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
+ / l1_entry_sectors;
+ extent = vmdk_add_extent(bs, bs->file, false,
+ le64_to_cpu(header.capacity),
+ le64_to_cpu(header.gd_offset) << 9,
+ le64_to_cpu(header.rgd_offset) << 9,
+ l1_size,
+ le32_to_cpu(header.num_gtes_per_gte),
+ le64_to_cpu(header.granularity));
+ if (extent->l1_entry_sectors <= 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ /* try to open parent images, if exist */
+ ret = vmdk_parent_open(bs);
+ if (ret) {
+ goto fail;
+ }
+ s->parent_cid = vmdk_read_cid(bs, 1);
+ ret = vmdk_init_tables(bs, extent);
+ if (ret) {
+ goto fail;
+ }
+ return 0;
+ fail:
+ vmdk_free_extents(bs);
+ return ret;
+}
+
+static int vmdk_open(BlockDriverState *bs, int flags)
+{
+ uint32_t magic;
+
+ if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
+ return -EIO;
+ }
+
+ magic = be32_to_cpu(magic);
+ if (magic == VMDK3_MAGIC) {
+ return vmdk_open_vmdk3(bs, flags);
+ } else if (magic == VMDK4_MAGIC) {
+ return vmdk_open_vmdk4(bs, flags);
+ } else {
+ return -EINVAL;
+ }
+}
+
+static int get_whole_cluster(BlockDriverState *bs,
+ VmdkExtent *extent,
+ uint64_t cluster_offset,
+ uint64_t offset,
+ bool allocate)
+{
+ /* 128 sectors * 512 bytes each = grain size 64KB */
+ uint8_t whole_grain[extent->cluster_sectors * 512];
+
+ /* we will be here if it's first write on non-exist grain(cluster).
+ * try to read from parent image, if exist */
if (bs->backing_hd) {
int ret;
if (!vmdk_is_cid_valid(bs))
return -1;
+ /* floor offset to cluster */
+ offset -= offset % (extent->cluster_sectors * 512);
ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
- s->cluster_sectors);
+ extent->cluster_sectors);
if (ret < 0) {
return -1;
}
- //Write grain only into the active image
- ret = bdrv_write(bs->file, cluster_offset, whole_grain,
- s->cluster_sectors);
+ /* Write grain only into the active image */
+ ret = bdrv_write(extent->file, cluster_offset, whole_grain,
+ extent->cluster_sectors);
if (ret < 0) {
return -1;
}
return 0;
}
-static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
+static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
{
- BDRVVmdkState *s = bs->opaque;
-
/* update L2 table */
- if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
- &(m_data->offset), sizeof(m_data->offset)) < 0)
+ if (bdrv_pwrite_sync(
+ extent->file,
+ ((int64_t)m_data->l2_offset * 512)
+ + (m_data->l2_index * sizeof(m_data->offset)),
+ &(m_data->offset),
+ sizeof(m_data->offset)
+ ) < 0) {
return -1;
+ }
/* update backup L2 table */
- if (s->l1_backup_table_offset != 0) {
- m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
- if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
- &(m_data->offset), sizeof(m_data->offset)) < 0)
+ if (extent->l1_backup_table_offset != 0) {
+ m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
+ if (bdrv_pwrite_sync(
+ extent->file,
+ ((int64_t)m_data->l2_offset * 512)
+ + (m_data->l2_index * sizeof(m_data->offset)),
+ &(m_data->offset), sizeof(m_data->offset)
+ ) < 0) {
return -1;
+ }
}
return 0;
}
-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
- uint64_t offset, int allocate)
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+ VmdkExtent *extent,
+ VmdkMetaData *m_data,
+ uint64_t offset, int allocate)
{
- BDRVVmdkState *s = bs->opaque;
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
uint32_t min_count, *l2_table, tmp = 0;
if (m_data)
m_data->valid = 0;
- l1_index = (offset >> 9) / s->l1_entry_sectors;
- if (l1_index >= s->l1_size)
+ l1_index = (offset >> 9) / extent->l1_entry_sectors;
+ if (l1_index >= extent->l1_size) {
return 0;
- l2_offset = s->l1_table[l1_index];
- if (!l2_offset)
+ }
+ l2_offset = extent->l1_table[l1_index];
+ if (!l2_offset) {
return 0;
- for(i = 0; i < L2_CACHE_SIZE; i++) {
- if (l2_offset == s->l2_cache_offsets[i]) {
+ }
+ for (i = 0; i < L2_CACHE_SIZE; i++) {
+ if (l2_offset == extent->l2_cache_offsets[i]) {
/* increment the hit count */
- if (++s->l2_cache_counts[i] == 0xffffffff) {
- for(j = 0; j < L2_CACHE_SIZE; j++) {
- s->l2_cache_counts[j] >>= 1;
+ if (++extent->l2_cache_counts[i] == 0xffffffff) {
+ for (j = 0; j < L2_CACHE_SIZE; j++) {
+ extent->l2_cache_counts[j] >>= 1;
}
}
- l2_table = s->l2_cache + (i * s->l2_size);
+ l2_table = extent->l2_cache + (i * extent->l2_size);
goto found;
}
}
/* not found: load a new entry in the least used one */
min_index = 0;
min_count = 0xffffffff;
- for(i = 0; i < L2_CACHE_SIZE; i++) {
- if (s->l2_cache_counts[i] < min_count) {
- min_count = s->l2_cache_counts[i];
+ for (i = 0; i < L2_CACHE_SIZE; i++) {
+ if (extent->l2_cache_counts[i] < min_count) {
+ min_count = extent->l2_cache_counts[i];
min_index = i;
}
}
- l2_table = s->l2_cache + (min_index * s->l2_size);
- if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
- s->l2_size * sizeof(uint32_t))
+ l2_table = extent->l2_cache + (min_index * extent->l2_size);
+ if (bdrv_pread(
+ extent->file,
+ (int64_t)l2_offset * 512,
+ l2_table,
+ extent->l2_size * sizeof(uint32_t)
+ ) != extent->l2_size * sizeof(uint32_t)) {
return 0;
+ }
- s->l2_cache_offsets[min_index] = l2_offset;
- s->l2_cache_counts[min_index] = 1;
+ extent->l2_cache_offsets[min_index] = l2_offset;
+ extent->l2_cache_counts[min_index] = 1;
found:
- l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
+ l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
cluster_offset = le32_to_cpu(l2_table[l2_index]);
if (!cluster_offset) {
return 0;
// Avoid the L2 tables update for the images that have snapshots.
- cluster_offset = bdrv_getlength(bs->file);
- bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
+ cluster_offset = bdrv_getlength(extent->file);
+ bdrv_truncate(
+ extent->file,
+ cluster_offset + (extent->cluster_sectors << 9)
+ );
cluster_offset >>= 9;
tmp = cpu_to_le32(cluster_offset);
* This problem may occur because of insufficient space on host disk
* or inappropriate VM shutdown.
*/
- if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
+ if (get_whole_cluster(
+ bs, extent, cluster_offset, offset, allocate) == -1)
return 0;
if (m_data) {
return cluster_offset;
}
+static VmdkExtent *find_extent(BDRVVmdkState *s,
+ int64_t sector_num, VmdkExtent *start_hint)
+{
+ VmdkExtent *extent = start_hint;
+
+ if (!extent) {
+ extent = &s->extents[0];
+ }
+ while (extent < &s->extents[s->num_extents]) {
+ if (sector_num < extent->end_sector) {
+ return extent;
+ }
+ extent++;
+ }
+ return NULL;
+}
+
static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
{
BDRVVmdkState *s = bs->opaque;
- int index_in_cluster, n;
- uint64_t cluster_offset;
- cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
- index_in_cluster = sector_num % s->cluster_sectors;
- n = s->cluster_sectors - index_in_cluster;
+ int64_t index_in_cluster, n, ret;
+ uint64_t offset;
+ VmdkExtent *extent;
+
+ extent = find_extent(s, sector_num, NULL);
+ if (!extent) {
+ return 0;
+ }
+ if (extent->flat) {
+ n = extent->end_sector - sector_num;
+ ret = 1;
+ } else {
+ offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
+ index_in_cluster = sector_num % extent->cluster_sectors;
+ n = extent->cluster_sectors - index_in_cluster;
+ ret = offset ? 1 : 0;
+ }
if (n > nb_sectors)
n = nb_sectors;
*pnum = n;
- return (cluster_offset != 0);
+ return ret;
}
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
- int index_in_cluster, n, ret;
+ int ret;
+ uint64_t n, index_in_cluster;
+ VmdkExtent *extent = NULL;
uint64_t cluster_offset;
while (nb_sectors > 0) {
- cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
- index_in_cluster = sector_num % s->cluster_sectors;
- n = s->cluster_sectors - index_in_cluster;
+ extent = find_extent(s, sector_num, extent);
+ if (!extent) {
+ return -EIO;
+ }
+ cluster_offset = get_cluster_offset(
+ bs, extent, NULL, sector_num << 9, 0);
+ index_in_cluster = sector_num % extent->cluster_sectors;
+ n = extent->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
n = nb_sectors;
if (!cluster_offset) {
const uint8_t *buf, int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
- VmdkMetaData m_data;
- int index_in_cluster, n;
+ VmdkExtent *extent = NULL;
+ int n;
+ int64_t index_in_cluster;
uint64_t cluster_offset;
static int cid_update = 0;
+ VmdkMetaData m_data;
if (sector_num > bs->total_sectors) {
fprintf(stderr,
}
while (nb_sectors > 0) {
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors)
- n = nb_sectors;
- cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
- if (!cluster_offset)
+ extent = find_extent(s, sector_num, extent);
+ if (!extent) {
+ return -EIO;
+ }
+ cluster_offset = get_cluster_offset(
+ bs,
+ extent,
+ &m_data,
+ sector_num << 9, 1);
+ if (!cluster_offset) {
return -1;
+ }
+ index_in_cluster = sector_num % extent->cluster_sectors;
+ n = extent->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
- if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
+ if (bdrv_pwrite(bs->file,
+ cluster_offset + index_in_cluster * 512,
+ buf, n * 512)
+ != n * 512) {
return -1;
+ }
if (m_data.valid) {
/* update L2 tables */
- if (vmdk_L2update(bs, &m_data) == -1)
+ if (vmdk_L2update(extent, &m_data) == -1) {
return -1;
+ }
}
nb_sectors -= n;
sector_num += n;
static void vmdk_close(BlockDriverState *bs)
{
- BDRVVmdkState *s = bs->opaque;
-
- qemu_free(s->l1_table);
- qemu_free(s->l2_cache);
+ vmdk_free_extents(bs);
}
static int vmdk_flush(BlockDriverState *bs)