X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/32b6444d23d0ff618d73e5b766600cd258066169..36f5db59cff03fd289541da104249f00356a19a7:/block/qcow2-refcount.c diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 4264148142..9507aef847 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -28,7 +28,7 @@ #include "qemu/range.h" #include "qapi/qmp/types.h" -static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size); +static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size); static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, int64_t offset, int64_t length, int addend, enum qcow2_discard_type type); @@ -40,8 +40,10 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, int qcow2_refcount_init(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; - int ret, refcount_table_size2, i; + unsigned int refcount_table_size2, i; + int ret; + assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t)); refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); s->refcount_table = g_malloc(refcount_table_size2); if (s->refcount_table_size > 0) { @@ -87,7 +89,7 @@ static int load_refcount_block(BlockDriverState *bs, static int get_refcount(BlockDriverState *bs, int64_t cluster_index) { BDRVQcowState *s = bs->opaque; - int refcount_table_index, block_index; + uint64_t refcount_table_index, block_index; int64_t refcount_block_offset; int ret; uint16_t *refcount_block; @@ -96,7 +98,8 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index) refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); if (refcount_table_index >= s->refcount_table_size) return 0; - refcount_block_offset = s->refcount_table[refcount_table_index]; + refcount_block_offset = + s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK; if (!refcount_block_offset) return 0; @@ -191,10 +194,11 @@ static int alloc_refcount_block(BlockDriverState *bs, * they can describe them themselves. * * - We need to consider that at this point we are inside update_refcounts - * and doing the initial refcount increase. This means that some clusters - * have already been allocated by the caller, but their refcount isn't - * accurate yet. free_cluster_index tells us where this allocation ends - * as long as we don't overwrite it by freeing clusters. + * and potentially doing an initial refcount increase. This means that + * some clusters have already been allocated by the caller, but their + * refcount isn't accurate yet. If we allocate clusters for metadata, we + * need to return -EAGAIN to signal the caller that it needs to restart + * the search for free clusters. * * - alloc_clusters_noref and qcow2_free_clusters may load a different * refcount block into the cache @@ -279,7 +283,10 @@ static int alloc_refcount_block(BlockDriverState *bs, } s->refcount_table[refcount_table_index] = new_block; - return 0; + + /* The new refcount block may be where the caller intended to put its + * data, so let it restart the search. */ + return -EAGAIN; } ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block); @@ -302,8 +309,11 @@ static int alloc_refcount_block(BlockDriverState *bs, /* Calculate the number of refcount blocks needed so far */ uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT); - uint64_t blocks_used = (s->free_cluster_index + - refcount_block_clusters - 1) / refcount_block_clusters; + uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters); + + if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) { + return -EFBIG; + } /* And now we need at least one block more for the new metadata */ uint64_t table_size = next_refcount_table_size(s, blocks_used + 1); @@ -336,8 +346,6 @@ static int alloc_refcount_block(BlockDriverState *bs, uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size); uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t)); - assert(meta_offset >= (s->free_cluster_index * s->cluster_size)); - /* Fill the new refcount table */ memcpy(new_table, s->refcount_table, s->refcount_table_size * sizeof(uint64_t)); @@ -400,18 +408,19 @@ static int alloc_refcount_block(BlockDriverState *bs, s->refcount_table_size = table_size; s->refcount_table_offset = table_offset; - /* Free old table. Remember, we must not change free_cluster_index */ - uint64_t old_free_cluster_index = s->free_cluster_index; + /* Free old table. */ qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t), QCOW2_DISCARD_OTHER); - s->free_cluster_index = old_free_cluster_index; ret = load_refcount_block(bs, new_block, (void**) refcount_block); if (ret < 0) { return ret; } - return 0; + /* If we were trying to do the initial refcount update for some cluster + * allocation, we might have used the same clusters to store newly + * allocated metadata. Make the caller search some new space. */ + return -EAGAIN; fail_table: g_free(new_table); @@ -515,8 +524,8 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, s->l2_table_cache); } - start = offset & ~(s->cluster_size - 1); - last = (offset + length - 1) & ~(s->cluster_size - 1); + start = start_of_cluster(s, offset); + last = start_of_cluster(s, offset + length - 1); for(cluster_offset = start; cluster_offset <= last; cluster_offset += s->cluster_size) { @@ -626,15 +635,16 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, /* return < 0 if error */ -static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size) +static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size) { BDRVQcowState *s = bs->opaque; - int i, nb_clusters, refcount; + uint64_t i, nb_clusters; + int refcount; nb_clusters = size_to_clusters(s, size); retry: for(i = 0; i < nb_clusters; i++) { - int64_t next_cluster_index = s->free_cluster_index++; + uint64_t next_cluster_index = s->free_cluster_index++; refcount = get_refcount(bs, next_cluster_index); if (refcount < 0) { @@ -643,6 +653,15 @@ retry: goto retry; } } + + /* Make sure that all offsets in the "allocated" range are representable + * in an int64_t */ + if (s->free_cluster_index > 0 && + s->free_cluster_index - 1 > (INT64_MAX >> s->cluster_bits)) + { + return -EFBIG; + } + #ifdef DEBUG_ALLOC2 fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n", size, @@ -651,18 +670,21 @@ retry: return (s->free_cluster_index - nb_clusters) << s->cluster_bits; } -int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size) +int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size) { int64_t offset; int ret; BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); - offset = alloc_clusters_noref(bs, size); - if (offset < 0) { - return offset; - } + do { + offset = alloc_clusters_noref(bs, size); + if (offset < 0) { + return offset; + } + + ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER); + } while (ret == -EAGAIN); - ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER); if (ret < 0) { return ret; } @@ -675,33 +697,36 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, { BDRVQcowState *s = bs->opaque; uint64_t cluster_index; - uint64_t old_free_cluster_index; - int i, refcount, ret; + uint64_t i; + int refcount, ret; - /* Check how many clusters there are free */ - cluster_index = offset >> s->cluster_bits; - for(i = 0; i < nb_clusters; i++) { - refcount = get_refcount(bs, cluster_index++); + assert(nb_clusters >= 0); + if (nb_clusters == 0) { + return 0; + } - if (refcount < 0) { - return refcount; - } else if (refcount != 0) { - break; + do { + /* Check how many clusters there are free */ + cluster_index = offset >> s->cluster_bits; + for(i = 0; i < nb_clusters; i++) { + refcount = get_refcount(bs, cluster_index++); + + if (refcount < 0) { + return refcount; + } else if (refcount != 0) { + break; + } } - } - /* And then allocate them */ - old_free_cluster_index = s->free_cluster_index; - s->free_cluster_index = cluster_index + i; + /* And then allocate them */ + ret = update_refcount(bs, offset, i << s->cluster_bits, 1, + QCOW2_DISCARD_NEVER); + } while (ret == -EAGAIN); - ret = update_refcount(bs, offset, i << s->cluster_bits, 1, - QCOW2_DISCARD_NEVER); if (ret < 0) { return ret; } - s->free_cluster_index = old_free_cluster_index; - return i; } @@ -724,7 +749,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) } redo: free_in_cluster = s->cluster_size - - (s->free_byte_offset & (s->cluster_size - 1)); + offset_into_cluster(s, s->free_byte_offset); if (size <= free_in_cluster) { /* enough space in current cluster */ offset = s->free_byte_offset; @@ -732,7 +757,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) free_in_cluster -= size; if (free_in_cluster == 0) s->free_byte_offset = 0; - if ((offset & (s->cluster_size - 1)) != 0) + if (offset_into_cluster(s, offset) != 0) qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1, QCOW2_DISCARD_NEVER); } else { @@ -740,7 +765,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) if (offset < 0) { return offset; } - cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1); + cluster_offset = start_of_cluster(s, s->free_byte_offset); if ((cluster_offset + s->cluster_size) == offset) { /* we are lucky: contiguous data */ offset = s->free_byte_offset; @@ -796,11 +821,13 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, } break; case QCOW2_CLUSTER_NORMAL: - qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, - nb_clusters << s->cluster_bits, type); + case QCOW2_CLUSTER_ZERO: + if (l2_entry & L2E_OFFSET_MASK) { + qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, + nb_clusters << s->cluster_bits, type); + } break; case QCOW2_CLUSTER_UNALLOCATED: - case QCOW2_CLUSTER_ZERO: break; default: abort(); @@ -874,7 +901,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, nb_csectors = ((offset >> s->csize_shift) & s->csize_mask) + 1; if (addend != 0) { - int ret; ret = update_refcount(bs, (offset & s->cluster_offset_mask) & ~511, nb_csectors * 512, addend, @@ -1003,22 +1029,17 @@ static void inc_refcounts(BlockDriverState *bs, int64_t offset, int64_t size) { BDRVQcowState *s = bs->opaque; - int64_t start, last, cluster_offset; - int k; + uint64_t start, last, cluster_offset, k; if (size <= 0) return; - start = offset & ~(s->cluster_size - 1); - last = (offset + size - 1) & ~(s->cluster_size - 1); + start = start_of_cluster(s, offset); + last = start_of_cluster(s, offset + size - 1); for(cluster_offset = start; cluster_offset <= last; cluster_offset += s->cluster_size) { k = cluster_offset >> s->cluster_bits; - if (k < 0) { - fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n", - cluster_offset); - res->corruptions++; - } else if (k >= refcount_table_size) { + if (k >= refcount_table_size) { fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after " "the end of the image file, can't properly check refcounts.\n", cluster_offset); @@ -1035,7 +1056,6 @@ static void inc_refcounts(BlockDriverState *bs, /* Flags for check_refcounts_l1() and check_refcounts_l2() */ enum { - CHECK_OFLAG_COPIED = 0x1, /* check QCOW_OFLAG_COPIED matches refcount */ CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */ }; @@ -1122,7 +1142,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, offset, s->cluster_size); /* Correct offsets are cluster aligned */ - if (offset & (s->cluster_size - 1)) { + if (offset_into_cluster(s, offset)) { fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not " "properly aligned; L2 entry corrupted.\n", offset); res->corruptions++; @@ -1194,7 +1214,7 @@ static int check_refcounts_l1(BlockDriverState *bs, l2_offset, s->cluster_size); /* L2 tables are cluster aligned */ - if (l2_offset & (s->cluster_size - 1)) { + if (offset_into_cluster(s, l2_offset)) { fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not " "cluster aligned; L1 entry corrupted\n", l2_offset); res->corruptions++; @@ -1311,9 +1331,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, } if (l2_dirty) { - ret = qcow2_pre_write_overlap_check(bs, - QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L2, l2_offset, - s->cluster_size); + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, + l2_offset, s->cluster_size); if (ret < 0) { fprintf(stderr, "ERROR: Could not write L2 table; metadata " "overlap check failed: %s\n", strerror(-ret)); @@ -1354,8 +1373,7 @@ static int write_reftable_entry(BlockDriverState *bs, int rt_index) buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]); } - ret = qcow2_pre_write_overlap_check(bs, - QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_TABLE, + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_TABLE, s->refcount_table_offset + rt_start_index * sizeof(uint64_t), sizeof(buf)); if (ret < 0) { @@ -1378,7 +1396,7 @@ static int write_reftable_entry(BlockDriverState *bs, int rt_index) * does _not_ decrement the reference count for the currently occupied cluster. * * This function prints an informative message to stderr on error (and returns - * -errno); on success, 0 is returned. + * -errno); on success, the offset of the newly allocated cluster is returned. */ static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index, uint64_t offset) @@ -1394,27 +1412,25 @@ static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index, fprintf(stderr, "Could not allocate new cluster: %s\n", strerror(-new_offset)); ret = new_offset; - goto fail; + goto done; } /* fetch current refcount block content */ ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block); if (ret < 0) { fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret)); - goto fail; + goto fail_free_cluster; } /* new block has not yet been entered into refcount table, therefore it is * no refcount block yet (regarding this check) */ - ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, new_offset, - s->cluster_size); + ret = qcow2_pre_write_overlap_check(bs, 0, new_offset, s->cluster_size); if (ret < 0) { fprintf(stderr, "Could not write refcount block; metadata overlap " "check failed: %s\n", strerror(-ret)); /* the image will be marked corrupt, so don't even attempt on freeing * the cluster */ - new_offset = 0; - goto fail; + goto done; } /* write to new block */ @@ -1422,34 +1438,37 @@ static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index, s->cluster_sectors); if (ret < 0) { fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret)); - goto fail; + goto fail_free_cluster; } /* update refcount table */ - assert(!(new_offset & (s->cluster_size - 1))); + assert(!offset_into_cluster(s, new_offset)); s->refcount_table[reftable_index] = new_offset; ret = write_reftable_entry(bs, reftable_index); if (ret < 0) { fprintf(stderr, "Could not update refcount table: %s\n", strerror(-ret)); - goto fail; + goto fail_free_cluster; } -fail: - if (new_offset && (ret < 0)) { - qcow2_free_clusters(bs, new_offset, s->cluster_size, - QCOW2_DISCARD_ALWAYS); - } + goto done; + +fail_free_cluster: + qcow2_free_clusters(bs, new_offset, s->cluster_size, QCOW2_DISCARD_OTHER); + +done: if (refcount_block) { - if (ret < 0) { - qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); - } else { - ret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); - } + /* This should never fail, as it would only do so if the given refcount + * block cannot be found in the cache. As this is impossible as long as + * there are no bugs, assert the success. */ + int tmp = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + assert(tmp == 0); } + if (ret < 0) { return ret; } + return new_offset; } @@ -1463,14 +1482,24 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVQcowState *s = bs->opaque; - int64_t size, i, highest_cluster; - int nb_clusters, refcount1, refcount2; + int64_t size, i, highest_cluster, nb_clusters; + int refcount1, refcount2; QCowSnapshot *sn; uint16_t *refcount_table; int ret; size = bdrv_getlength(bs->file); + if (size < 0) { + res->check_errors++; + return size; + } + nb_clusters = size_to_clusters(s, size); + if (nb_clusters > INT_MAX) { + res->check_errors++; + return -EFBIG; + } + refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t)); res->bfi.total_clusters = @@ -1482,8 +1511,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, /* current L1 table */ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, - s->l1_table_offset, s->l1_size, - CHECK_OFLAG_COPIED | CHECK_FRAG_INFO); + s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO); if (ret < 0) { goto fail; } @@ -1511,7 +1539,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, cluster = offset >> s->cluster_bits; /* Refcount blocks are cluster aligned */ - if (offset & (s->cluster_size - 1)) { + if (offset_into_cluster(s, offset)) { fprintf(stderr, "ERROR refcount block %" PRId64 " is not " "cluster aligned; refcount table entry corrupted\n", i); res->corruptions++; @@ -1640,8 +1668,8 @@ fail: * looking for overlaps with important metadata sections (L1/L2 tables etc.), * i.e. a sanity check without relying on the refcount tables. * - * The chk parameter specifies exactly what checks to perform (being a bitmask - * of QCow2MetadataOverlap values). + * The ign parameter specifies what checks not to perform (being a bitmask of + * QCow2MetadataOverlap values), i.e., what sections to ignore. * * Returns: * - 0 if writing to this offset will not affect the mentioned metadata @@ -1649,10 +1677,11 @@ fail: * - a negative value (-errno) indicating an error while performing a check, * e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2 */ -int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset, +int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, int64_t size) { BDRVQcowState *s = bs->opaque; + int chk = s->overlap_check & ~ign; int i, j; if (!size) { @@ -1722,20 +1751,19 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset, for (i = 0; i < s->nb_snapshots; i++) { uint64_t l1_ofs = s->snapshots[i].l1_table_offset; uint32_t l1_sz = s->snapshots[i].l1_size; - uint64_t *l1 = g_malloc(l1_sz * sizeof(uint64_t)); + uint64_t l1_sz2 = l1_sz * sizeof(uint64_t); + uint64_t *l1 = g_malloc(l1_sz2); int ret; - ret = bdrv_read(bs->file, l1_ofs / BDRV_SECTOR_SIZE, (uint8_t *)l1, - l1_sz * sizeof(uint64_t) / BDRV_SECTOR_SIZE); - + ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2); if (ret < 0) { g_free(l1); return ret; } for (j = 0; j < l1_sz; j++) { - if ((l1[j] & L1E_OFFSET_MASK) && - overlaps_with(l1[j] & L1E_OFFSET_MASK, s->cluster_size)) { + uint64_t l2_ofs = be64_to_cpu(l1[j]) & L1E_OFFSET_MASK; + if (l2_ofs && overlaps_with(l2_ofs, s->cluster_size)) { g_free(l1); return QCOW2_OL_INACTIVE_L2; } @@ -1769,10 +1797,10 @@ static const char *metadata_ol_names[] = { * Returns 0 if there were neither overlaps nor errors while checking for * overlaps; or a negative value (-errno) on error. */ -int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset, +int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, int64_t size) { - int ret = qcow2_check_metadata_overlap(bs, chk, offset, size); + int ret = qcow2_check_metadata_overlap(bs, ign, offset, size); if (ret < 0) { return ret;