2 * Block driver for the QCOW version 2 format
4 * Copyright (c) 2004-2006 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "qapi/error.h"
28 #include "qemu/bswap.h"
29 #include "qemu/error-report.h"
30 #include "qemu/cutils.h"
32 void qcow2_free_snapshots(BlockDriverState *bs)
34 BDRVQcow2State *s = bs->opaque;
37 for(i = 0; i < s->nb_snapshots; i++) {
38 g_free(s->snapshots[i].name);
39 g_free(s->snapshots[i].id_str);
46 int qcow2_read_snapshots(BlockDriverState *bs)
48 BDRVQcow2State *s = bs->opaque;
50 QCowSnapshotExtraData extra;
52 int i, id_str_size, name_size;
54 uint32_t extra_data_size;
57 if (!s->nb_snapshots) {
59 s->snapshots_size = 0;
63 offset = s->snapshots_offset;
64 s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
66 for(i = 0; i < s->nb_snapshots; i++) {
67 /* Read statically sized part of the snapshot header */
68 offset = ROUND_UP(offset, 8);
69 ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
75 sn = s->snapshots + i;
76 sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
77 sn->l1_size = be32_to_cpu(h.l1_size);
78 sn->vm_state_size = be32_to_cpu(h.vm_state_size);
79 sn->date_sec = be32_to_cpu(h.date_sec);
80 sn->date_nsec = be32_to_cpu(h.date_nsec);
81 sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
82 extra_data_size = be32_to_cpu(h.extra_data_size);
84 id_str_size = be16_to_cpu(h.id_str_size);
85 name_size = be16_to_cpu(h.name_size);
88 ret = bdrv_pread(bs->file, offset, &extra,
89 MIN(sizeof(extra), extra_data_size));
93 offset += extra_data_size;
95 if (extra_data_size >= 8) {
96 sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
99 if (extra_data_size >= 16) {
100 sn->disk_size = be64_to_cpu(extra.disk_size);
102 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
105 /* Read snapshot ID */
106 sn->id_str = g_malloc(id_str_size + 1);
107 ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
111 offset += id_str_size;
112 sn->id_str[id_str_size] = '\0';
114 /* Read snapshot name */
115 sn->name = g_malloc(name_size + 1);
116 ret = bdrv_pread(bs->file, offset, sn->name, name_size);
121 sn->name[name_size] = '\0';
123 if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) {
129 assert(offset - s->snapshots_offset <= INT_MAX);
130 s->snapshots_size = offset - s->snapshots_offset;
134 qcow2_free_snapshots(bs);
138 /* add at the end of the file a new list of snapshots */
139 static int qcow2_write_snapshots(BlockDriverState *bs)
141 BDRVQcow2State *s = bs->opaque;
143 QCowSnapshotHeader h;
144 QCowSnapshotExtraData extra;
145 int i, name_size, id_str_size, snapshots_size;
147 uint32_t nb_snapshots;
148 uint64_t snapshots_offset;
149 } QEMU_PACKED header_data;
150 int64_t offset, snapshots_offset = 0;
153 /* compute the size of the snapshots */
155 for(i = 0; i < s->nb_snapshots; i++) {
156 sn = s->snapshots + i;
157 offset = ROUND_UP(offset, 8);
159 offset += sizeof(extra);
160 offset += strlen(sn->id_str);
161 offset += strlen(sn->name);
163 if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
169 assert(offset <= INT_MAX);
170 snapshots_size = offset;
172 /* Allocate space for the new snapshot list */
173 snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
174 offset = snapshots_offset;
179 ret = bdrv_flush(bs);
184 /* The snapshot list position has not yet been updated, so these clusters
185 * must indeed be completely free */
186 ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
192 /* Write all snapshots to the new list */
193 for(i = 0; i < s->nb_snapshots; i++) {
194 sn = s->snapshots + i;
195 memset(&h, 0, sizeof(h));
196 h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
197 h.l1_size = cpu_to_be32(sn->l1_size);
198 /* If it doesn't fit in 32 bit, older implementations should treat it
199 * as a disk-only snapshot rather than truncate the VM state */
200 if (sn->vm_state_size <= 0xffffffff) {
201 h.vm_state_size = cpu_to_be32(sn->vm_state_size);
203 h.date_sec = cpu_to_be32(sn->date_sec);
204 h.date_nsec = cpu_to_be32(sn->date_nsec);
205 h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
206 h.extra_data_size = cpu_to_be32(sizeof(extra));
208 memset(&extra, 0, sizeof(extra));
209 extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
210 extra.disk_size = cpu_to_be64(sn->disk_size);
212 id_str_size = strlen(sn->id_str);
213 name_size = strlen(sn->name);
214 assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
215 h.id_str_size = cpu_to_be16(id_str_size);
216 h.name_size = cpu_to_be16(name_size);
217 offset = ROUND_UP(offset, 8);
219 ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
225 ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
229 offset += sizeof(extra);
231 ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
235 offset += id_str_size;
237 ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
245 * Update the header to point to the new snapshot table. This requires the
246 * new table and its refcounts to be stable on disk.
248 ret = bdrv_flush(bs);
253 QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
254 offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
256 header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
257 header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
259 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
260 &header_data, sizeof(header_data));
265 /* free the old snapshot table */
266 qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
267 QCOW2_DISCARD_SNAPSHOT);
268 s->snapshots_offset = snapshots_offset;
269 s->snapshots_size = snapshots_size;
273 if (snapshots_offset > 0) {
274 qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
275 QCOW2_DISCARD_ALWAYS);
280 static void find_new_snapshot_id(BlockDriverState *bs,
281 char *id_str, int id_str_size)
283 BDRVQcow2State *s = bs->opaque;
286 unsigned long id, id_max = 0;
288 for(i = 0; i < s->nb_snapshots; i++) {
289 sn = s->snapshots + i;
290 id = strtoul(sn->id_str, NULL, 10);
294 snprintf(id_str, id_str_size, "%lu", id_max + 1);
297 static int find_snapshot_by_id_and_name(BlockDriverState *bs,
301 BDRVQcow2State *s = bs->opaque;
305 for (i = 0; i < s->nb_snapshots; i++) {
306 if (!strcmp(s->snapshots[i].id_str, id) &&
307 !strcmp(s->snapshots[i].name, name)) {
312 for (i = 0; i < s->nb_snapshots; i++) {
313 if (!strcmp(s->snapshots[i].id_str, id)) {
318 for (i = 0; i < s->nb_snapshots; i++) {
319 if (!strcmp(s->snapshots[i].name, name)) {
328 static int find_snapshot_by_id_or_name(BlockDriverState *bs,
329 const char *id_or_name)
333 ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
337 return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
340 /* if no id is provided, a new one is constructed */
341 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
343 BDRVQcow2State *s = bs->opaque;
344 QCowSnapshot *new_snapshot_list = NULL;
345 QCowSnapshot *old_snapshot_list = NULL;
346 QCowSnapshot sn1, *sn = &sn1;
348 uint64_t *l1_table = NULL;
349 int64_t l1_table_offset;
351 if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
355 if (has_data_file(bs)) {
359 memset(sn, 0, sizeof(*sn));
362 find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
364 /* Populate sn with passed data */
365 sn->id_str = g_strdup(sn_info->id_str);
366 sn->name = g_strdup(sn_info->name);
368 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
369 sn->vm_state_size = sn_info->vm_state_size;
370 sn->date_sec = sn_info->date_sec;
371 sn->date_nsec = sn_info->date_nsec;
372 sn->vm_clock_nsec = sn_info->vm_clock_nsec;
374 /* Allocate the L1 table of the snapshot and copy the current one there. */
375 l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
376 if (l1_table_offset < 0) {
377 ret = l1_table_offset;
381 sn->l1_table_offset = l1_table_offset;
382 sn->l1_size = s->l1_size;
384 l1_table = g_try_new(uint64_t, s->l1_size);
385 if (s->l1_size && l1_table == NULL) {
390 for(i = 0; i < s->l1_size; i++) {
391 l1_table[i] = cpu_to_be64(s->l1_table[i]);
394 ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
395 s->l1_size * sizeof(uint64_t), false);
400 ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
401 s->l1_size * sizeof(uint64_t));
410 * Increase the refcounts of all clusters and make sure everything is
411 * stable on disk before updating the snapshot table to contain a pointer
412 * to the new L1 table.
414 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
419 /* Append the new snapshot to the snapshot list */
420 new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
422 memcpy(new_snapshot_list, s->snapshots,
423 s->nb_snapshots * sizeof(QCowSnapshot));
424 old_snapshot_list = s->snapshots;
426 s->snapshots = new_snapshot_list;
427 s->snapshots[s->nb_snapshots++] = *sn;
429 ret = qcow2_write_snapshots(bs);
431 g_free(s->snapshots);
432 s->snapshots = old_snapshot_list;
437 g_free(old_snapshot_list);
439 /* The VM state isn't needed any more in the active L1 table; in fact, it
440 * hurts by causing expensive COW for the next snapshot. */
441 qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
442 ROUND_UP(sn->vm_state_size, s->cluster_size),
443 QCOW2_DISCARD_NEVER, false);
447 BdrvCheckResult result = {0};
448 qcow2_check_refcounts(bs, &result, 0);
461 /* copy the snapshot 'snapshot_name' into the current disk image */
462 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
464 BDRVQcow2State *s = bs->opaque;
466 Error *local_err = NULL;
467 int i, snapshot_index;
468 int cur_l1_bytes, sn_l1_bytes;
470 uint64_t *sn_l1_table = NULL;
472 if (has_data_file(bs)) {
476 /* Search the snapshot */
477 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
478 if (snapshot_index < 0) {
481 sn = &s->snapshots[snapshot_index];
483 ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
484 sizeof(uint64_t), QCOW_MAX_L1_SIZE,
485 "Snapshot L1 table", &local_err);
487 error_report_err(local_err);
491 if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
492 error_report("qcow2: Loading snapshots with different disk "
493 "size is not implemented");
499 * Make sure that the current L1 table is big enough to contain the whole
500 * L1 table of the snapshot. If the snapshot L1 table is smaller, the
501 * current one must be padded with zeros.
503 ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
508 cur_l1_bytes = s->l1_size * sizeof(uint64_t);
509 sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
512 * Copy the snapshot L1 table to the current L1 table.
514 * Before overwriting the old current L1 table on disk, make sure to
515 * increase all refcounts for the clusters referenced by the new one.
516 * Decrease the refcount referenced by the old one only when the L1
517 * table is overwritten.
519 sn_l1_table = g_try_malloc0(cur_l1_bytes);
520 if (cur_l1_bytes && sn_l1_table == NULL) {
525 ret = bdrv_pread(bs->file, sn->l1_table_offset,
526 sn_l1_table, sn_l1_bytes);
531 ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
537 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
538 s->l1_table_offset, cur_l1_bytes,
544 ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
551 * Decrease refcount of clusters of current L1 table.
553 * At this point, the in-memory s->l1_table points to the old L1 table,
554 * whereas on disk we already have the new one.
556 * qcow2_update_snapshot_refcount special cases the current L1 table to use
557 * the in-memory data instead of really using the offset to load a new one,
558 * which is why this works.
560 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
564 * Now update the in-memory L1 table to be in sync with the on-disk one. We
565 * need to do this even if updating refcounts failed.
567 for(i = 0;i < s->l1_size; i++) {
568 s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
579 * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
580 * when we decreased the refcount of the old snapshot.
582 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
589 BdrvCheckResult result = {0};
590 qcow2_check_refcounts(bs, &result, 0);
600 int qcow2_snapshot_delete(BlockDriverState *bs,
601 const char *snapshot_id,
605 BDRVQcow2State *s = bs->opaque;
607 int snapshot_index, ret;
609 if (has_data_file(bs)) {
613 /* Search the snapshot */
614 snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
615 if (snapshot_index < 0) {
616 error_setg(errp, "Can't find the snapshot");
619 sn = s->snapshots[snapshot_index];
621 ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size,
622 sizeof(uint64_t), QCOW_MAX_L1_SIZE,
623 "Snapshot L1 table", errp);
628 /* Remove it from the snapshot list */
629 memmove(s->snapshots + snapshot_index,
630 s->snapshots + snapshot_index + 1,
631 (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
633 ret = qcow2_write_snapshots(bs);
635 error_setg_errno(errp, -ret,
636 "Failed to remove snapshot from snapshot list");
641 * The snapshot is now unused, clean up. If we fail after this point, we
642 * won't recover but just leak clusters.
648 * Now decrease the refcounts of clusters referenced by the snapshot and
651 ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
654 error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
657 qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
658 QCOW2_DISCARD_SNAPSHOT);
660 /* must update the copied flag on the current cluster offsets */
661 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
663 error_setg_errno(errp, -ret,
664 "Failed to update snapshot status in disk");
670 BdrvCheckResult result = {0};
671 qcow2_check_refcounts(bs, &result, 0);
677 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
679 BDRVQcow2State *s = bs->opaque;
680 QEMUSnapshotInfo *sn_tab, *sn_info;
684 if (has_data_file(bs)) {
687 if (!s->nb_snapshots) {
689 return s->nb_snapshots;
692 sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
693 for(i = 0; i < s->nb_snapshots; i++) {
694 sn_info = sn_tab + i;
695 sn = s->snapshots + i;
696 pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
698 pstrcpy(sn_info->name, sizeof(sn_info->name),
700 sn_info->vm_state_size = sn->vm_state_size;
701 sn_info->date_sec = sn->date_sec;
702 sn_info->date_nsec = sn->date_nsec;
703 sn_info->vm_clock_nsec = sn->vm_clock_nsec;
706 return s->nb_snapshots;
709 int qcow2_snapshot_load_tmp(BlockDriverState *bs,
710 const char *snapshot_id,
714 int i, snapshot_index;
715 BDRVQcow2State *s = bs->opaque;
717 uint64_t *new_l1_table;
721 assert(bs->read_only);
723 /* Search the snapshot */
724 snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
725 if (snapshot_index < 0) {
727 "Can't find snapshot");
730 sn = &s->snapshots[snapshot_index];
732 /* Allocate and read in the snapshot's L1 table */
733 ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
734 sizeof(uint64_t), QCOW_MAX_L1_SIZE,
735 "Snapshot L1 table", errp);
739 new_l1_bytes = sn->l1_size * sizeof(uint64_t);
740 new_l1_table = qemu_try_blockalign(bs->file->bs,
741 ROUND_UP(new_l1_bytes, 512));
742 if (new_l1_table == NULL) {
746 ret = bdrv_pread(bs->file, sn->l1_table_offset,
747 new_l1_table, new_l1_bytes);
749 error_setg(errp, "Failed to read l1 table for snapshot");
750 qemu_vfree(new_l1_table);
754 /* Switch the L1 table */
755 qemu_vfree(s->l1_table);
757 s->l1_size = sn->l1_size;
758 s->l1_table_offset = sn->l1_table_offset;
759 s->l1_table = new_l1_table;
761 for(i = 0;i < s->l1_size; i++) {
762 be64_to_cpus(&s->l1_table[i]);