2 * Block driver for the QCOW version 2 format
4 * Copyright (c) 2004-2006 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu-common.h"
26 #include "block_int.h"
27 #include "block/qcow2.h"
29 typedef struct QEMU_PACKED QCowSnapshotHeader {
30 /* header is 8 byte aligned */
31 uint64_t l1_table_offset;
40 uint64_t vm_clock_nsec;
42 uint32_t vm_state_size;
43 uint32_t extra_data_size; /* for extension */
44 /* extra data follows */
49 typedef struct QEMU_PACKED QCowSnapshotExtraData {
50 uint64_t vm_state_size_large;
52 } QCowSnapshotExtraData;
54 void qcow2_free_snapshots(BlockDriverState *bs)
56 BDRVQcowState *s = bs->opaque;
59 for(i = 0; i < s->nb_snapshots; i++) {
60 g_free(s->snapshots[i].name);
61 g_free(s->snapshots[i].id_str);
68 int qcow2_read_snapshots(BlockDriverState *bs)
70 BDRVQcowState *s = bs->opaque;
72 QCowSnapshotExtraData extra;
74 int i, id_str_size, name_size;
76 uint32_t extra_data_size;
79 if (!s->nb_snapshots) {
81 s->snapshots_size = 0;
85 offset = s->snapshots_offset;
86 s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
88 for(i = 0; i < s->nb_snapshots; i++) {
89 /* Read statically sized part of the snapshot header */
90 offset = align_offset(offset, 8);
91 ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
97 sn = s->snapshots + i;
98 sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
99 sn->l1_size = be32_to_cpu(h.l1_size);
100 sn->vm_state_size = be32_to_cpu(h.vm_state_size);
101 sn->date_sec = be32_to_cpu(h.date_sec);
102 sn->date_nsec = be32_to_cpu(h.date_nsec);
103 sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
104 extra_data_size = be32_to_cpu(h.extra_data_size);
106 id_str_size = be16_to_cpu(h.id_str_size);
107 name_size = be16_to_cpu(h.name_size);
109 /* Read extra data */
110 ret = bdrv_pread(bs->file, offset, &extra,
111 MIN(sizeof(extra), extra_data_size));
115 offset += extra_data_size;
117 if (extra_data_size >= 8) {
118 sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
121 if (extra_data_size >= 16) {
122 sn->disk_size = be64_to_cpu(extra.disk_size);
124 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
127 /* Read snapshot ID */
128 sn->id_str = g_malloc(id_str_size + 1);
129 ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
133 offset += id_str_size;
134 sn->id_str[id_str_size] = '\0';
136 /* Read snapshot name */
137 sn->name = g_malloc(name_size + 1);
138 ret = bdrv_pread(bs->file, offset, sn->name, name_size);
143 sn->name[name_size] = '\0';
146 s->snapshots_size = offset - s->snapshots_offset;
150 qcow2_free_snapshots(bs);
154 /* add at the end of the file a new list of snapshots */
155 static int qcow2_write_snapshots(BlockDriverState *bs)
157 BDRVQcowState *s = bs->opaque;
159 QCowSnapshotHeader h;
160 QCowSnapshotExtraData extra;
161 int i, name_size, id_str_size, snapshots_size;
163 uint32_t nb_snapshots;
164 uint64_t snapshots_offset;
165 } QEMU_PACKED header_data;
166 int64_t offset, snapshots_offset;
169 /* compute the size of the snapshots */
171 for(i = 0; i < s->nb_snapshots; i++) {
172 sn = s->snapshots + i;
173 offset = align_offset(offset, 8);
175 offset += sizeof(extra);
176 offset += strlen(sn->id_str);
177 offset += strlen(sn->name);
179 snapshots_size = offset;
181 /* Allocate space for the new snapshot list */
182 snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
183 bdrv_flush(bs->file);
184 offset = snapshots_offset;
189 /* Write all snapshots to the new list */
190 for(i = 0; i < s->nb_snapshots; i++) {
191 sn = s->snapshots + i;
192 memset(&h, 0, sizeof(h));
193 h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
194 h.l1_size = cpu_to_be32(sn->l1_size);
195 /* If it doesn't fit in 32 bit, older implementations should treat it
196 * as a disk-only snapshot rather than truncate the VM state */
197 if (sn->vm_state_size <= 0xffffffff) {
198 h.vm_state_size = cpu_to_be32(sn->vm_state_size);
200 h.date_sec = cpu_to_be32(sn->date_sec);
201 h.date_nsec = cpu_to_be32(sn->date_nsec);
202 h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
203 h.extra_data_size = cpu_to_be32(sizeof(extra));
205 memset(&extra, 0, sizeof(extra));
206 extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
207 extra.disk_size = cpu_to_be64(sn->disk_size);
209 id_str_size = strlen(sn->id_str);
210 name_size = strlen(sn->name);
211 h.id_str_size = cpu_to_be16(id_str_size);
212 h.name_size = cpu_to_be16(name_size);
213 offset = align_offset(offset, 8);
215 ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
221 ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
225 offset += sizeof(extra);
227 ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
231 offset += id_str_size;
233 ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
241 * Update the header to point to the new snapshot table. This requires the
242 * new table and its refcounts to be stable on disk.
244 ret = bdrv_flush(bs);
249 QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
250 offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
252 header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
253 header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
255 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
256 &header_data, sizeof(header_data));
261 /* free the old snapshot table */
262 qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
263 s->snapshots_offset = snapshots_offset;
264 s->snapshots_size = snapshots_size;
271 static void find_new_snapshot_id(BlockDriverState *bs,
272 char *id_str, int id_str_size)
274 BDRVQcowState *s = bs->opaque;
276 int i, id, id_max = 0;
278 for(i = 0; i < s->nb_snapshots; i++) {
279 sn = s->snapshots + i;
280 id = strtoul(sn->id_str, NULL, 10);
284 snprintf(id_str, id_str_size, "%d", id_max + 1);
287 static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
289 BDRVQcowState *s = bs->opaque;
292 for(i = 0; i < s->nb_snapshots; i++) {
293 if (!strcmp(s->snapshots[i].id_str, id_str))
299 static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
301 BDRVQcowState *s = bs->opaque;
304 ret = find_snapshot_by_id(bs, name);
307 for(i = 0; i < s->nb_snapshots; i++) {
308 if (!strcmp(s->snapshots[i].name, name))
314 /* if no id is provided, a new one is constructed */
315 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
317 BDRVQcowState *s = bs->opaque;
318 QCowSnapshot *new_snapshot_list = NULL;
319 QCowSnapshot *old_snapshot_list = NULL;
320 QCowSnapshot sn1, *sn = &sn1;
322 uint64_t *l1_table = NULL;
323 int64_t l1_table_offset;
325 memset(sn, 0, sizeof(*sn));
327 /* Generate an ID if it wasn't passed */
328 if (sn_info->id_str[0] == '\0') {
329 find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
332 /* Check that the ID is unique */
333 if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
337 /* Populate sn with passed data */
338 sn->id_str = g_strdup(sn_info->id_str);
339 sn->name = g_strdup(sn_info->name);
341 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
342 sn->vm_state_size = sn_info->vm_state_size;
343 sn->date_sec = sn_info->date_sec;
344 sn->date_nsec = sn_info->date_nsec;
345 sn->vm_clock_nsec = sn_info->vm_clock_nsec;
347 /* Allocate the L1 table of the snapshot and copy the current one there. */
348 l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
349 if (l1_table_offset < 0) {
350 ret = l1_table_offset;
354 sn->l1_table_offset = l1_table_offset;
355 sn->l1_size = s->l1_size;
357 l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
358 for(i = 0; i < s->l1_size; i++) {
359 l1_table[i] = cpu_to_be64(s->l1_table[i]);
362 ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
363 s->l1_size * sizeof(uint64_t));
372 * Increase the refcounts of all clusters and make sure everything is
373 * stable on disk before updating the snapshot table to contain a pointer
374 * to the new L1 table.
376 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
381 ret = bdrv_flush(bs);
386 /* Append the new snapshot to the snapshot list */
387 new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
389 memcpy(new_snapshot_list, s->snapshots,
390 s->nb_snapshots * sizeof(QCowSnapshot));
391 old_snapshot_list = s->snapshots;
393 s->snapshots = new_snapshot_list;
394 s->snapshots[s->nb_snapshots++] = *sn;
396 ret = qcow2_write_snapshots(bs);
398 g_free(s->snapshots);
399 s->snapshots = old_snapshot_list;
403 g_free(old_snapshot_list);
407 BdrvCheckResult result = {0};
408 qcow2_check_refcounts(bs, &result, 0);
421 /* copy the snapshot 'snapshot_name' into the current disk image */
422 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
424 BDRVQcowState *s = bs->opaque;
426 int i, snapshot_index;
427 int cur_l1_bytes, sn_l1_bytes;
429 uint64_t *sn_l1_table = NULL;
431 /* Search the snapshot */
432 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
433 if (snapshot_index < 0) {
436 sn = &s->snapshots[snapshot_index];
438 if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
439 error_report("qcow2: Loading snapshots with different disk "
440 "size is not implemented");
446 * Make sure that the current L1 table is big enough to contain the whole
447 * L1 table of the snapshot. If the snapshot L1 table is smaller, the
448 * current one must be padded with zeros.
450 ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
455 cur_l1_bytes = s->l1_size * sizeof(uint64_t);
456 sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
459 * Copy the snapshot L1 table to the current L1 table.
461 * Before overwriting the old current L1 table on disk, make sure to
462 * increase all refcounts for the clusters referenced by the new one.
463 * Decrease the refcount referenced by the old one only when the L1
464 * table is overwritten.
466 sn_l1_table = g_malloc0(cur_l1_bytes);
468 ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
473 ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
479 ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
486 * Decrease refcount of clusters of current L1 table.
488 * At this point, the in-memory s->l1_table points to the old L1 table,
489 * whereas on disk we already have the new one.
491 * qcow2_update_snapshot_refcount special cases the current L1 table to use
492 * the in-memory data instead of really using the offset to load a new one,
493 * which is why this works.
495 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
499 * Now update the in-memory L1 table to be in sync with the on-disk one. We
500 * need to do this even if updating refcounts failed.
502 for(i = 0;i < s->l1_size; i++) {
503 s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
514 * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
515 * when we decreased the refcount of the old snapshot.
517 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
524 BdrvCheckResult result = {0};
525 qcow2_check_refcounts(bs, &result, 0);
535 int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
537 BDRVQcowState *s = bs->opaque;
539 int snapshot_index, ret;
541 /* Search the snapshot */
542 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
543 if (snapshot_index < 0) {
546 sn = s->snapshots[snapshot_index];
548 /* Remove it from the snapshot list */
549 memmove(s->snapshots + snapshot_index,
550 s->snapshots + snapshot_index + 1,
551 (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
553 ret = qcow2_write_snapshots(bs);
559 * The snapshot is now unused, clean up. If we fail after this point, we
560 * won't recover but just leak clusters.
566 * Now decrease the refcounts of clusters referenced by the snapshot and
569 ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
574 qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t));
576 /* must update the copied flag on the current cluster offsets */
577 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
584 BdrvCheckResult result = {0};
585 qcow2_check_refcounts(bs, &result, 0);
591 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
593 BDRVQcowState *s = bs->opaque;
594 QEMUSnapshotInfo *sn_tab, *sn_info;
598 if (!s->nb_snapshots) {
600 return s->nb_snapshots;
603 sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
604 for(i = 0; i < s->nb_snapshots; i++) {
605 sn_info = sn_tab + i;
606 sn = s->snapshots + i;
607 pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
609 pstrcpy(sn_info->name, sizeof(sn_info->name),
611 sn_info->vm_state_size = sn->vm_state_size;
612 sn_info->date_sec = sn->date_sec;
613 sn_info->date_nsec = sn->date_nsec;
614 sn_info->vm_clock_nsec = sn->vm_clock_nsec;
617 return s->nb_snapshots;
620 int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
622 int i, snapshot_index;
623 BDRVQcowState *s = bs->opaque;
625 uint64_t *new_l1_table;
629 assert(bs->read_only);
631 /* Search the snapshot */
632 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
633 if (snapshot_index < 0) {
636 sn = &s->snapshots[snapshot_index];
638 /* Allocate and read in the snapshot's L1 table */
639 new_l1_bytes = s->l1_size * sizeof(uint64_t);
640 new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
642 ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
644 g_free(new_l1_table);
648 /* Switch the L1 table */
651 s->l1_size = sn->l1_size;
652 s->l1_table_offset = sn->l1_table_offset;
653 s->l1_table = new_l1_table;
655 for(i = 0;i < s->l1_size; i++) {
656 be64_to_cpus(&s->l1_table[i]);