#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
#include "qcow2.h"
#include "qemu/range.h"
#include "qemu/bswap.h"
#include "qemu/cutils.h"
+#include "trace.h"
-static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
+static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
+ uint64_t max);
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int64_t offset, int64_t length, uint64_t addend,
bool decrease, enum qcow2_discard_type type);
}
/* Allocate the refcount block itself and mark it as used */
- int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
+ int64_t new_block = alloc_clusters_noref(bs, s->cluster_size, INT64_MAX);
if (new_block < 0) {
return new_block;
}
+ /* The offset must fit in the offset field of the refcount table entry */
+ assert((new_block & REFT_OFFSET_MASK) == new_block);
+
/* If we're allocating the block at offset 0 then something is wrong */
if (new_block == 0) {
qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
/* Discard is optional, ignore the return value */
if (ret >= 0) {
- bdrv_pdiscard(bs->file->bs, d->offset, d->bytes);
+ int r2 = bdrv_pdiscard(bs->file, d->offset, d->bytes);
+ if (r2 < 0) {
+ trace_qcow2_process_discards_failed_region(d->offset, d->bytes,
+ r2);
+ }
}
g_free(d);
/* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size)
+static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
+ uint64_t max)
{
BDRVQcow2State *s = bs->opaque;
uint64_t i, nb_clusters, refcount;
}
/* Make sure that all offsets in the "allocated" range are representable
- * in an int64_t */
+ * in the requested max */
if (s->free_cluster_index > 0 &&
- s->free_cluster_index - 1 > (INT64_MAX >> s->cluster_bits))
+ s->free_cluster_index - 1 > (max >> s->cluster_bits))
{
return -EFBIG;
}
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
do {
- offset = alloc_clusters_noref(bs, size);
+ offset = alloc_clusters_noref(bs, size, QCOW_MAX_CLUSTER_OFFSET);
if (offset < 0) {
return offset;
}
free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
do {
if (!offset || free_in_cluster < size) {
- int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size);
+ int64_t new_cluster;
+
+ new_cluster = alloc_clusters_noref(bs, s->cluster_size,
+ MIN(s->cluster_offset_mask,
+ QCOW_MAX_CLUSTER_OFFSET));
if (new_cluster < 0) {
return new_cluster;
}
int nb_clusters, enum qcow2_discard_type type)
{
BDRVQcow2State *s = bs->opaque;
+ QCow2ClusterType ctype = qcow2_get_cluster_type(bs, l2_entry);
+
+ if (has_data_file(bs)) {
+ if (s->discard_passthrough[type] &&
+ (ctype == QCOW2_CLUSTER_NORMAL ||
+ ctype == QCOW2_CLUSTER_ZERO_ALLOC))
+ {
+ bdrv_pdiscard(s->data_file, l2_entry & L2E_OFFSET_MASK,
+ nb_clusters << s->cluster_bits);
+ }
+ return;
+ }
- switch (qcow2_get_cluster_type(l2_entry)) {
+ switch (ctype) {
case QCOW2_CLUSTER_COMPRESSED:
{
- int nb_csectors;
- nb_csectors = ((l2_entry >> s->csize_shift) &
- s->csize_mask) + 1;
- qcow2_free_clusters(bs,
- (l2_entry & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, type);
+ int64_t offset = (l2_entry & s->cluster_offset_mask)
+ & QCOW2_COMPRESSED_SECTOR_MASK;
+ int size = QCOW2_COMPRESSED_SECTOR_SIZE *
+ (((l2_entry >> s->csize_shift) & s->csize_mask) + 1);
+ qcow2_free_clusters(bs, offset, size, type);
}
break;
case QCOW2_CLUSTER_NORMAL:
entry &= ~QCOW_OFLAG_COPIED;
offset = entry & L2E_OFFSET_MASK;
- switch (qcow2_get_cluster_type(entry)) {
+ switch (qcow2_get_cluster_type(bs, entry)) {
case QCOW2_CLUSTER_COMPRESSED:
nb_csectors = ((entry >> s->csize_shift) &
s->csize_mask) + 1;
if (addend != 0) {
+ uint64_t coffset = (entry & s->cluster_offset_mask)
+ & QCOW2_COMPRESSED_SECTOR_MASK;
ret = update_refcount(
- bs, (entry & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, abs(addend), addend < 0,
+ bs, coffset,
+ nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE,
+ abs(addend), addend < 0,
QCOW2_DISCARD_SNAPSHOT);
if (ret < 0) {
goto fail;
{
BDRVQcow2State *s = bs->opaque;
uint64_t start, last, cluster_offset, k, refcount;
+ int64_t file_len;
int ret;
if (size <= 0) {
return 0;
}
+ file_len = bdrv_getlength(bs->file->bs);
+ if (file_len < 0) {
+ return file_len;
+ }
+
+ /*
+ * Last cluster of qcow2 image may be semi-allocated, so it may be OK to
+ * reference some space after file end but it should be less than one
+ * cluster.
+ */
+ if (offset + size - file_len >= s->cluster_size) {
+ fprintf(stderr, "ERROR: counting reference for region exceeding the "
+ "end of the file by one cluster or more: offset 0x%" PRIx64
+ " size 0x%" PRIx64 "\n", offset, size);
+ res->corruptions++;
+ return 0;
+ }
+
start = start_of_cluster(s, offset);
last = start_of_cluster(s, offset + size - 1);
for(cluster_offset = start; cluster_offset <= last;
static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
void **refcount_table,
int64_t *refcount_table_size, int64_t l2_offset,
- int flags, BdrvCheckMode fix)
+ int flags, BdrvCheckMode fix, bool active)
{
BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table, l2_entry;
for(i = 0; i < s->l2_size; i++) {
l2_entry = be64_to_cpu(l2_table[i]);
- switch (qcow2_get_cluster_type(l2_entry)) {
+ switch (qcow2_get_cluster_type(bs, l2_entry)) {
case QCOW2_CLUSTER_COMPRESSED:
/* Compressed clusters don't have QCOW_OFLAG_COPIED */
if (l2_entry & QCOW_OFLAG_COPIED) {
res->corruptions++;
}
+ if (has_data_file(bs)) {
+ fprintf(stderr, "ERROR compressed cluster %d with data file, "
+ "entry=0x%" PRIx64 "\n", i, l2_entry);
+ res->corruptions++;
+ break;
+ }
+
/* Mark cluster as used */
nb_csectors = ((l2_entry >> s->csize_shift) &
s->csize_mask) + 1;
l2_entry &= s->cluster_offset_mask;
- ret = qcow2_inc_refcounts_imrt(bs, res,
- refcount_table, refcount_table_size,
- l2_entry & ~511, nb_csectors * 512);
+ ret = qcow2_inc_refcounts_imrt(
+ bs, res, refcount_table, refcount_table_size,
+ l2_entry & QCOW2_COMPRESSED_SECTOR_MASK,
+ nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE);
if (ret < 0) {
goto fail;
}
{
uint64_t offset = l2_entry & L2E_OFFSET_MASK;
- if (flags & CHECK_FRAG_INFO) {
- res->bfi.allocated_clusters++;
- if (next_contiguous_offset &&
- offset != next_contiguous_offset) {
- res->bfi.fragmented_clusters++;
- }
- next_contiguous_offset = offset + s->cluster_size;
- }
-
/* Correct offsets are cluster aligned */
if (offset_into_cluster(s, offset)) {
- if (qcow2_get_cluster_type(l2_entry) ==
+ res->corruptions++;
+
+ if (qcow2_get_cluster_type(bs, l2_entry) ==
QCOW2_CLUSTER_ZERO_ALLOC)
{
fprintf(stderr, "%s offset=%" PRIx64 ": Preallocated zero "
if (fix & BDRV_FIX_ERRORS) {
uint64_t l2e_offset =
l2_offset + (uint64_t)i * sizeof(uint64_t);
+ int ign = active ? QCOW2_OL_ACTIVE_L2 :
+ QCOW2_OL_INACTIVE_L2;
l2_entry = QCOW_OFLAG_ZERO;
l2_table[i] = cpu_to_be64(l2_entry);
- ret = qcow2_pre_write_overlap_check(bs,
- QCOW2_OL_ACTIVE_L2 | QCOW2_OL_INACTIVE_L2,
- l2e_offset, sizeof(uint64_t));
+ ret = qcow2_pre_write_overlap_check(bs, ign,
+ l2e_offset, sizeof(uint64_t), false);
if (ret < 0) {
fprintf(stderr, "ERROR: Overlap check failed\n");
res->check_errors++;
/* Do not abort, continue checking the rest of this
* L2 table's entries */
} else {
+ res->corruptions--;
res->corruptions_fixed++;
/* Skip marking the cluster as used
* (it is unused now) */
continue;
}
- } else {
- res->corruptions++;
}
} else {
fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is "
"not properly aligned; L2 entry corrupted.\n", offset);
- res->corruptions++;
}
}
+ if (flags & CHECK_FRAG_INFO) {
+ res->bfi.allocated_clusters++;
+ if (next_contiguous_offset &&
+ offset != next_contiguous_offset) {
+ res->bfi.fragmented_clusters++;
+ }
+ next_contiguous_offset = offset + s->cluster_size;
+ }
+
/* Mark cluster as used */
- ret = qcow2_inc_refcounts_imrt(bs, res,
- refcount_table, refcount_table_size,
- offset, s->cluster_size);
- if (ret < 0) {
- goto fail;
+ if (!has_data_file(bs)) {
+ ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table,
+ refcount_table_size,
+ offset, s->cluster_size);
+ if (ret < 0) {
+ goto fail;
+ }
}
break;
}
void **refcount_table,
int64_t *refcount_table_size,
int64_t l1_table_offset, int l1_size,
- int flags, BdrvCheckMode fix)
+ int flags, BdrvCheckMode fix, bool active)
{
BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table = NULL, l2_offset, l1_size2;
/* Process and check L2 entries */
ret = check_refcounts_l2(bs, res, refcount_table,
refcount_table_size, l2_offset, flags,
- fix);
+ fix, active);
if (ret < 0) {
goto fail;
}
for (i = 0; i < s->l1_size; i++) {
uint64_t l1_entry = s->l1_table[i];
uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
- bool l2_dirty = false;
+ int l2_dirty = 0;
if (!l2_offset) {
continue;
continue;
}
if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
+ res->corruptions++;
fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
"l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
repair ? "Repairing" : "ERROR", i, l1_entry, refcount);
res->check_errors++;
goto fail;
}
+ res->corruptions--;
res->corruptions_fixed++;
- } else {
- res->corruptions++;
}
}
for (j = 0; j < s->l2_size; j++) {
uint64_t l2_entry = be64_to_cpu(l2_table[j]);
uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
- QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
+ QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry);
if (cluster_type == QCOW2_CLUSTER_NORMAL ||
cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) {
- ret = qcow2_get_refcount(bs,
- data_offset >> s->cluster_bits,
- &refcount);
- if (ret < 0) {
- /* don't print message nor increment check_errors */
- continue;
+ if (has_data_file(bs)) {
+ refcount = 1;
+ } else {
+ ret = qcow2_get_refcount(bs,
+ data_offset >> s->cluster_bits,
+ &refcount);
+ if (ret < 0) {
+ /* don't print message nor increment check_errors */
+ continue;
+ }
}
if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
+ res->corruptions++;
fprintf(stderr, "%s OFLAG_COPIED data cluster: "
"l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
repair ? "Repairing" : "ERROR", l2_entry, refcount);
l2_table[j] = cpu_to_be64(refcount == 1
? l2_entry | QCOW_OFLAG_COPIED
: l2_entry & ~QCOW_OFLAG_COPIED);
- l2_dirty = true;
- res->corruptions_fixed++;
- } else {
- res->corruptions++;
+ l2_dirty++;
}
}
}
}
- if (l2_dirty) {
+ if (l2_dirty > 0) {
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
- l2_offset, s->cluster_size);
+ l2_offset, s->cluster_size,
+ false);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not write L2 table; metadata "
"overlap check failed: %s\n", strerror(-ret));
res->check_errors++;
goto fail;
}
+ res->corruptions -= l2_dirty;
+ res->corruptions_fixed += l2_dirty;
}
}
}
if (cluster >= *nb_clusters) {
+ res->corruptions++;
fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n",
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
goto resize_fail;
}
- ret = bdrv_truncate(bs->file, offset + s->cluster_size,
+ ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
PREALLOC_MODE_OFF, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto resize_fail;
}
+ res->corruptions--;
res->corruptions_fixed++;
ret = qcow2_inc_refcounts_imrt(bs, res,
refcount_table, nb_clusters,
continue;
resize_fail:
- res->corruptions++;
*rebuild = true;
fprintf(stderr, "ERROR could not resize image: %s\n",
strerror(-ret));
- } else {
- res->corruptions++;
}
continue;
}
/* current L1 table */
ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO,
- fix);
+ fix, true);
if (ret < 0) {
return ret;
}
/* snapshots */
+ if (has_data_file(bs) && s->nb_snapshots) {
+ fprintf(stderr, "ERROR %d snapshots in image with data file\n",
+ s->nb_snapshots);
+ res->corruptions++;
+ }
+
for (i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
if (offset_into_cluster(s, sn->l1_table_offset)) {
continue;
}
ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
- sn->l1_table_offset, sn->l1_size, 0, fix);
+ sn->l1_table_offset, sn->l1_size, 0, fix,
+ false);
if (ret < 0) {
return ret;
}
}
ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
- s->cluster_size);
+ s->cluster_size, false);
if (ret < 0) {
fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
goto fail;
on_disk_refblock = (void *)((char *) *refcount_table +
refblock_index * s->cluster_size);
- ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
- on_disk_refblock, s->cluster_sectors);
+ ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
+ s->cluster_size);
if (ret < 0) {
fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
goto fail;
}
ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
- reftable_size * sizeof(uint64_t));
+ reftable_size * sizeof(uint64_t),
+ false);
if (ret < 0) {
fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
goto fail;
}
}
+ if ((chk & QCOW2_OL_BITMAP_DIRECTORY) &&
+ (s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS))
+ {
+ if (overlaps_with(s->bitmap_directory_offset,
+ s->bitmap_directory_size))
+ {
+ return QCOW2_OL_BITMAP_DIRECTORY;
+ }
+ }
+
return 0;
}
static const char *metadata_ol_names[] = {
- [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header",
- [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table",
- [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table",
- [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
- [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
- [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
- [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table",
- [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table",
+ [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header",
+ [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table",
+ [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table",
+ [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
+ [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
+ [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
+ [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table",
+ [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table",
+ [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = "bitmap directory",
};
+QEMU_BUILD_BUG_ON(QCOW2_OL_MAX_BITNR != ARRAY_SIZE(metadata_ol_names));
/*
* First performs a check for metadata overlaps (through
* overlaps; or a negative value (-errno) on error.
*/
int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
- int64_t size)
+ int64_t size, bool data_file)
{
- int ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
+ int ret;
+
+ if (data_file && has_data_file(bs)) {
+ return 0;
+ }
+ ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
if (ret < 0) {
return ret;
} else if (ret > 0) {
if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
offset = (*reftable)[reftable_index];
- ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
+ ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size,
+ false);
if (ret < 0) {
error_setg_errno(errp, -ret, "Overlap check failed");
return ret;
/* Write the new reftable */
ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
- new_reftable_size * sizeof(uint64_t));
+ new_reftable_size * sizeof(uint64_t),
+ false);
if (ret < 0) {
error_setg_errno(errp, -ret, "Overlap check failed");
goto done;
"There are no references in the refcount table.");
return -EIO;
}
+
+int qcow2_detect_metadata_preallocation(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t i, end_cluster, cluster_count = 0, threshold;
+ int64_t file_length, real_allocation, real_clusters;
+
+ qemu_co_mutex_assert_locked(&s->lock);
+
+ file_length = bdrv_getlength(bs->file->bs);
+ if (file_length < 0) {
+ return file_length;
+ }
+
+ real_allocation = bdrv_get_allocated_file_size(bs->file->bs);
+ if (real_allocation < 0) {
+ return real_allocation;
+ }
+
+ real_clusters = real_allocation / s->cluster_size;
+ threshold = MAX(real_clusters * 10 / 9, real_clusters + 2);
+
+ end_cluster = size_to_clusters(s, file_length);
+ for (i = 0; i < end_cluster && cluster_count < threshold; i++) {
+ uint64_t refcount;
+ int ret = qcow2_get_refcount(bs, i, &refcount);
+ if (ret < 0) {
+ return ret;
+ }
+ cluster_count += !!refcount;
+ }
+
+ return cluster_count >= threshold;
+}