BDRVQcowState *s = bs->opaque;
int new_l1_size2, ret, i;
uint64_t *new_l1_table;
+ int64_t old_l1_table_offset, old_l1_size;
int64_t new_l1_table_offset, new_l1_size;
uint8_t data[12];
if (min_size <= s->l1_size)
return 0;
+ /* Do a sanity check on min_size before trying to calculate new_l1_size
+ * (this prevents overflows during the while loop for the calculation of
+ * new_l1_size) */
+ if (min_size > INT_MAX / sizeof(uint64_t)) {
+ return -EFBIG;
+ }
+
if (exact_size) {
new_l1_size = min_size;
} else {
}
}
- if (new_l1_size > INT_MAX) {
+ if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
return -EFBIG;
}
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
+ new_l1_table = qemu_try_blockalign(bs->file,
+ align_offset(new_l1_size2, 512));
+ if (new_l1_table == NULL) {
+ return -ENOMEM;
+ }
+ memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
+
memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
/* write new table (align to cluster) */
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
if (new_l1_table_offset < 0) {
- g_free(new_l1_table);
+ qemu_vfree(new_l1_table);
return new_l1_table_offset;
}
/* the L1 position has not yet been updated, so these clusters must
* indeed be completely free */
- ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
- new_l1_table_offset, new_l1_size2);
+ ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset,
+ new_l1_size2);
if (ret < 0) {
goto fail;
}
/* set new table */
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
cpu_to_be32w((uint32_t*)data, new_l1_size);
- cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
+ stq_be_p(data + 4, new_l1_table_offset);
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
if (ret < 0) {
goto fail;
}
- g_free(s->l1_table);
- qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
+ qemu_vfree(s->l1_table);
+ old_l1_table_offset = s->l1_table_offset;
s->l1_table_offset = new_l1_table_offset;
s->l1_table = new_l1_table;
+ old_l1_size = s->l1_size;
s->l1_size = new_l1_size;
+ qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
return 0;
fail:
- g_free(new_l1_table);
+ qemu_vfree(new_l1_table);
qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
QCOW2_DISCARD_OTHER);
return ret;
buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
}
- ret = qcow2_pre_write_overlap_check(bs,
- QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
s->l1_table_offset + 8 * l1_start_index, sizeof(buf));
if (ret < 0) {
return ret;
{
BDRVQcowState *s = bs->opaque;
uint64_t old_l2_offset;
- uint64_t *l2_table;
+ uint64_t *l2_table = NULL;
int64_t l2_offset;
int ret;
l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
if (l2_offset < 0) {
- return l2_offset;
+ ret = l2_offset;
+ goto fail;
}
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
trace_qcow2_l2_allocate_get_empty(bs, l1_index);
ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
if (ret < 0) {
- return ret;
+ goto fail;
}
l2_table = *table;
fail:
trace_qcow2_l2_allocate_done(bs, l1_index, ret);
- qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
+ if (l2_table != NULL) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
+ }
s->l1_table[l1_index] = old_l2_offset;
+ if (l2_offset > 0) {
+ qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
+ QCOW2_DISCARD_ALWAYS);
+ }
return ret;
}
* cluster which may require a different handling)
*/
static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
- uint64_t *l2_table, uint64_t start, uint64_t stop_flags)
+ uint64_t *l2_table, uint64_t stop_flags)
{
int i;
- uint64_t mask = stop_flags | L2E_OFFSET_MASK;
- uint64_t offset = be64_to_cpu(l2_table[0]) & mask;
+ uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
+ uint64_t first_entry = be64_to_cpu(l2_table[0]);
+ uint64_t offset = first_entry & mask;
if (!offset)
return 0;
- for (i = start; i < start + nb_clusters; i++) {
+ assert(qcow2_get_cluster_type(first_entry) != QCOW2_CLUSTER_COMPRESSED);
+
+ for (i = 0; i < nb_clusters; i++) {
uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
if (offset + (uint64_t) i * cluster_size != l2_entry) {
break;
}
}
- return (i - start);
+ return i;
}
static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
struct iovec iov;
int n, ret;
- /*
- * If this is the last cluster and it is only partially used, we must only
- * copy until the end of the image, or bdrv_check_request will fail for the
- * bdrv_read/write calls below.
- */
- if (start_sect + n_end > bs->total_sectors) {
- n_end = bs->total_sectors - start_sect;
- }
-
n = n_end - n_start;
if (n <= 0) {
return 0;
}
iov.iov_len = n * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+ iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
+ if (iov.iov_base == NULL) {
+ return -ENOMEM;
+ }
qemu_iovec_init_external(&qiov, &iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
+ if (!bs->drv) {
+ ret = -ENOMEDIUM;
+ goto out;
+ }
+
/* Call .bdrv_co_readv() directly instead of using the public block-layer
* interface. This avoids double I/O throttling and request tracking,
* which can lead to deadlock when block layer copy-on-read is enabled.
&s->aes_encrypt_key);
}
- ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ ret = qcow2_pre_write_overlap_check(bs, 0,
cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE);
if (ret < 0) {
goto out;
break;
case QCOW2_CLUSTER_ZERO:
if (s->qcow_version < 3) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
return -EIO;
}
c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ &l2_table[l2_index], QCOW_OFLAG_ZERO);
*cluster_offset = 0;
break;
case QCOW2_CLUSTER_UNALLOCATED:
case QCOW2_CLUSTER_NORMAL:
/* how many allocated clusters ? */
c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ &l2_table[l2_index], QCOW_OFLAG_ZERO);
*cluster_offset &= L2E_OFFSET_MASK;
break;
default:
trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
assert(m->nb_clusters > 0);
- old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+ old_cluster = g_try_malloc(m->nb_clusters * sizeof(uint64_t));
+ if (old_cluster == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
/* copy content of unmodified sectors */
ret = perform_cow(bs, m, &m->cow_start);
}
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ assert(l2_index + m->nb_clusters <= s->l2_size);
for (i = 0; i < m->nb_clusters; i++) {
/* if two concurrent writes happen to the same unallocated cluster
* each write allocates separate cluster and writes data concurrently.
/* We keep all QCOW_OFLAG_COPIED clusters */
keep_clusters =
count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
+ &l2_table[l2_index],
QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
assert(keep_clusters <= nb_clusters);
* Return 0 on success and -errno in error cases
*/
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
+ int *num, uint64_t *host_offset, QCowL2Meta **m)
{
BDRVQcowState *s = bs->opaque;
uint64_t start, remaining;
uint64_t cur_bytes;
int ret;
- trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
- n_start, n_end);
+ trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num);
- assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
- offset = start_of_cluster(s, offset);
+ assert((offset & ~BDRV_SECTOR_MASK) == 0);
again:
- start = offset + (n_start << BDRV_SECTOR_BITS);
- remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
+ start = offset;
+ remaining = *num << BDRV_SECTOR_BITS;
cluster_offset = 0;
*host_offset = 0;
cur_bytes = 0;
}
}
- *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
+ *num -= remaining >> BDRV_SECTOR_BITS;
assert(*num > 0);
assert(*host_offset != 0);
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
for (i = 0; i < nb_clusters; i++) {
- uint64_t old_offset;
+ uint64_t old_l2_entry;
- old_offset = be64_to_cpu(l2_table[l2_index + i]);
- if ((old_offset & L2E_OFFSET_MASK) == 0) {
- continue;
+ old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+
+ /*
+ * Make sure that a discarded area reads back as zeroes for v3 images
+ * (we cannot do it for v2 without actually writing a zero-filled
+ * buffer). We can skip the operation if the cluster is already marked
+ * as zero, or if it's unallocated and we don't have a backing file.
+ *
+ * TODO We might want to use bdrv_get_block_status(bs) here, but we're
+ * holding s->lock, so that doesn't work today.
+ */
+ switch (qcow2_get_cluster_type(old_l2_entry)) {
+ case QCOW2_CLUSTER_UNALLOCATED:
+ if (!bs->backing_hd) {
+ continue;
+ }
+ break;
+
+ case QCOW2_CLUSTER_ZERO:
+ continue;
+
+ case QCOW2_CLUSTER_NORMAL:
+ case QCOW2_CLUSTER_COMPRESSED:
+ break;
+
+ default:
+ abort();
}
/* First remove L2 entries */
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- l2_table[l2_index + i] = cpu_to_be64(0);
+ if (s->qcow_version >= 3) {
+ l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+ } else {
+ l2_table[l2_index + i] = cpu_to_be64(0);
+ }
/* Then decrease the refcount */
- qcow2_free_any_clusters(bs, old_offset, 1, type);
+ qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
}
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
/* Round start up and end down */
offset = align_offset(offset, s->cluster_size);
- end_offset &= ~(s->cluster_size - 1);
+ end_offset = start_of_cluster(s, end_offset);
if (offset > end_offset) {
return 0;
* i.e., the number of bits in expanded_clusters.
*/
static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
- int l1_size, uint8_t *expanded_clusters,
- uint64_t nb_clusters)
+ int l1_size, uint8_t **expanded_clusters,
+ uint64_t *nb_clusters)
{
BDRVQcowState *s = bs->opaque;
bool is_active_l1 = (l1_table == s->l1_table);
if (!is_active_l1) {
/* inactive L2 tables require a buffer to be stored in when loading
* them from disk */
- l2_table = qemu_blockalign(bs, s->cluster_size);
+ l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+ if (l2_table == NULL) {
+ return -ENOMEM;
+ }
}
for (i = 0; i < l1_size; i++) {
uint64_t l2_entry = be64_to_cpu(l2_table[j]);
int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index;
int cluster_type = qcow2_get_cluster_type(l2_entry);
+ bool preallocated = offset != 0;
if (cluster_type == QCOW2_CLUSTER_NORMAL) {
cluster_index = offset >> s->cluster_bits;
- assert((cluster_index >= 0) && (cluster_index < nb_clusters));
- if (expanded_clusters[cluster_index / 8] &
+ assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
+ if ((*expanded_clusters)[cluster_index / 8] &
(1 << (cluster_index % 8))) {
/* Probably a shared L2 table; this cluster was a zero
* cluster which has been expanded, its refcount
continue;
}
- if (!offset) {
- /* not preallocated */
+ if (!preallocated) {
if (!bs->backing_hd) {
/* not backed; therefore we can simply deallocate the
* cluster */
}
}
- ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
- offset, s->cluster_size);
+ ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
if (ret < 0) {
- qcow2_free_clusters(bs, offset, s->cluster_size,
- QCOW2_DISCARD_ALWAYS);
+ if (!preallocated) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ }
goto fail;
}
ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
- s->cluster_sectors);
+ s->cluster_sectors, 0);
if (ret < 0) {
- qcow2_free_clusters(bs, offset, s->cluster_size,
- QCOW2_DISCARD_ALWAYS);
+ if (!preallocated) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ }
goto fail;
}
l2_dirty = true;
cluster_index = offset >> s->cluster_bits;
- assert((cluster_index >= 0) && (cluster_index < nb_clusters));
- expanded_clusters[cluster_index / 8] |= 1 << (cluster_index % 8);
+
+ if (cluster_index >= *nb_clusters) {
+ uint64_t old_bitmap_size = (*nb_clusters + 7) / 8;
+ uint64_t new_bitmap_size;
+ /* The offset may lie beyond the old end of the underlying image
+ * file for growable files only */
+ assert(bs->file->growable);
+ *nb_clusters = size_to_clusters(s, bs->file->total_sectors *
+ BDRV_SECTOR_SIZE);
+ new_bitmap_size = (*nb_clusters + 7) / 8;
+ *expanded_clusters = g_realloc(*expanded_clusters,
+ new_bitmap_size);
+ /* clear the newly allocated space */
+ memset(&(*expanded_clusters)[old_bitmap_size], 0,
+ new_bitmap_size - old_bitmap_size);
+ }
+
+ assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
+ (*expanded_clusters)[cluster_index / 8] |= 1 << (cluster_index % 8);
}
if (is_active_l1) {
}
} else {
if (l2_dirty) {
- ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT &
- ~(QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2), l2_offset,
+ ret = qcow2_pre_write_overlap_check(bs,
+ QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset,
s->cluster_size);
if (ret < 0) {
goto fail;
{
BDRVQcowState *s = bs->opaque;
uint64_t *l1_table = NULL;
- int cluster_to_sector_bits = s->cluster_bits - BDRV_SECTOR_BITS;
uint64_t nb_clusters;
uint8_t *expanded_clusters;
int ret;
int i, j;
- nb_clusters = (bs->total_sectors + (1 << cluster_to_sector_bits) - 1)
- >> cluster_to_sector_bits;
- expanded_clusters = g_malloc0((nb_clusters + 7) / 8);
+ nb_clusters = size_to_clusters(s, bs->file->total_sectors *
+ BDRV_SECTOR_SIZE);
+ expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8);
+ if (expanded_clusters == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
- expanded_clusters, nb_clusters);
+ &expanded_clusters, &nb_clusters);
if (ret < 0) {
goto fail;
}
}
ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
- expanded_clusters, nb_clusters);
+ &expanded_clusters, &nb_clusters);
if (ret < 0) {
goto fail;
}