* See the COPYING.LIB file in the top-level directory.
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "block/vhdx.h"
if (ret < 0) {
goto exit;
}
+ vhdx_log_entry_hdr_le_import(hdr);
exit:
return ret;
return ret;
}
+/* Writes num_sectors to the log (all log sectors are 4096 bytes),
+ * from buffer 'buffer'. Upon return, *sectors_written will contain
+ * the number of sectors successfully written.
+ *
+ * It is assumed that 'buffer' is at least 4096*num_sectors large.
+ *
+ * 0 is returned on success, -errno otherwise */
+static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+ uint32_t *sectors_written, void *buffer,
+ uint32_t num_sectors)
+{
+ int ret = 0;
+ uint64_t offset;
+ uint32_t write;
+ void *buffer_tmp;
+ BDRVVHDXState *s = bs->opaque;
+
+ ret = vhdx_user_visible_write(bs, s);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ write = log->write;
+
+ buffer_tmp = buffer;
+ while (num_sectors) {
+
+ offset = log->offset + write;
+ write = vhdx_log_inc_idx(write, log->length);
+ if (write == log->read) {
+ /* full */
+ break;
+ }
+ ret = bdrv_pwrite(bs->file, offset, buffer_tmp,
+ VHDX_LOG_SECTOR_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+ buffer_tmp += VHDX_LOG_SECTOR_SIZE;
+
+ log->write = write;
+ *sectors_written = *sectors_written + 1;
+ num_sectors--;
+ }
+
+exit:
+ return ret;
+}
+
+
/* Validates a log entry header */
static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
BDRVVHDXState *s)
{
int valid = false;
- if (memcmp(&hdr->signature, "loge", 4)) {
+ if (hdr->signature != VHDX_LOG_SIGNATURE) {
goto exit;
}
goto exit;
}
- if (!memcmp(&desc->signature, "zero", 4)) {
+ if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
/* valid */
ret = true;
}
- } else if (!memcmp(&desc->signature, "desc", 4)) {
+ } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
/* valid */
ret = true;
}
* passed into this function. Each descriptor will also be validated,
* and error returned if any are invalid. */
static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
- VHDXLogEntries *log, VHDXLogDescEntries **buffer)
+ VHDXLogEntries *log, VHDXLogDescEntries **buffer,
+ bool convert_endian)
{
int ret = 0;
uint32_t desc_sectors;
uint32_t sectors_read;
VHDXLogEntryHeader hdr;
VHDXLogDescEntries *desc_entries = NULL;
+ VHDXLogDescriptor desc;
int i;
assert(*buffer == NULL);
if (ret < 0) {
goto exit;
}
- vhdx_log_entry_hdr_le_import(&hdr);
+
if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
ret = -EINVAL;
goto exit;
}
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
- desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);
+ desc_entries = qemu_try_blockalign(bs->file->bs,
+ desc_sectors * VHDX_LOG_SECTOR_SIZE);
+ if (desc_entries == NULL) {
+ ret = -ENOMEM;
+ goto exit;
+ }
ret = vhdx_log_read_sectors(bs, log, §ors_read, desc_entries,
desc_sectors, false);
/* put in proper endianness, and validate each desc */
for (i = 0; i < hdr.descriptor_count; i++) {
- vhdx_log_desc_le_import(&desc_entries->desc[i]);
- if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
+ desc = desc_entries->desc[i];
+ vhdx_log_desc_le_import(&desc);
+ if (convert_endian) {
+ desc_entries->desc[i] = desc;
+ }
+ if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
ret = -EINVAL;
goto free_and_exit;
}
}
+ if (convert_endian) {
+ desc_entries->hdr = hdr;
+ }
*buffer = desc_entries;
goto exit;
buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
- if (!memcmp(&desc->signature, "desc", 4)) {
+ if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
/* data sector */
if (data == NULL) {
ret = -EFAULT;
memcpy(buffer+offset, &desc->trailing_bytes, 4);
- } else if (!memcmp(&desc->signature, "zero", 4)) {
+ } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
/* write 'count' sectors of sector */
memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
+ } else {
+ error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
+ desc->signature);
+ ret = -EINVAL;
+ goto exit;
}
file_offset = desc->file_offset;
/* if the log shows a FlushedFileOffset larger than our current file
* size, then that means the file has been truncated / corrupted, and
* we must refused to open it / use it */
- if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
+ if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) {
ret = -EINVAL;
goto exit;
}
- ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
+ ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
if (ret < 0) {
goto exit;
}
for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
- if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
+ if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
/* data sector, so read a sector to flush */
ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read,
data, 1, false);
ret = -EINVAL;
goto exit;
}
+ vhdx_log_data_le_import(data);
}
ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
goto exit;
}
}
- if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
+ if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) {
new_file_size = desc_entries->hdr.last_file_offset;
if (new_file_size % (1024*1024)) {
/* round up to nearest 1MB boundary */
new_file_size = ((new_file_size >> 20) + 1) << 20;
- bdrv_truncate(bs->file, new_file_size);
+ bdrv_truncate(bs->file, new_file_size, PREALLOC_MODE_OFF, NULL);
}
}
qemu_vfree(desc_entries);
goto inc_and_exit;
}
- vhdx_log_entry_hdr_le_import(&hdr);
-
-
if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
goto inc_and_exit;
}
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
- /* Read desc sectors, and calculate log checksum */
+ /* Read all log sectors, and calculate log checksum */
total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
- /* read_desc() will incrememnt the read idx */
- ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
+ /* read_desc() will increment the read idx */
+ ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
if (ret < 0) {
goto free_and_exit;
}
}
}
crc ^= 0xffffffff;
- if (crc != desc_buffer->hdr.checksum) {
+ if (crc != hdr.checksum) {
goto free_and_exit;
}
*
* If read-only, we must replay the log in RAM (or refuse to open
* a dirty VHDX file read-only) */
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed)
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
+ Error **errp)
{
int ret = 0;
VHDXHeader *hdr;
}
if (logs.valid) {
+ if (bs->read_only) {
+ ret = -EPERM;
+ error_setg(errp,
+ "VHDX image file '%s' opened read-only, but "
+ "contains a log that needs to be replayed",
+ bs->filename);
+ error_append_hint(errp, "To replay the log, run:\n"
+ "qemu-img check -r all '%s'\n",
+ bs->filename);
+ goto exit;
+ }
/* now flush the log */
ret = vhdx_log_flush(bs, s, &logs);
if (ret < 0) {
}
+
+static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
+ VHDXLogDataSector *sector, void *data,
+ uint64_t seq)
+{
+ /* 8 + 4084 + 4 = 4096, 1 log sector */
+ memcpy(&desc->leading_bytes, data, 8);
+ data += 8;
+ cpu_to_le64s(&desc->leading_bytes);
+ memcpy(sector->data, data, 4084);
+ data += 4084;
+ memcpy(&desc->trailing_bytes, data, 4);
+ cpu_to_le32s(&desc->trailing_bytes);
+ data += 4;
+
+ sector->sequence_high = (uint32_t) (seq >> 32);
+ sector->sequence_low = (uint32_t) (seq & 0xffffffff);
+ sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
+
+ vhdx_log_desc_le_export(desc);
+ vhdx_log_data_le_export(sector);
+}
+
+
+static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
+ void *data, uint32_t length, uint64_t offset)
+{
+ int ret = 0;
+ void *buffer = NULL;
+ void *merged_sector = NULL;
+ void *data_tmp, *sector_write;
+ unsigned int i;
+ int sector_offset;
+ uint32_t desc_sectors, sectors, total_length;
+ uint32_t sectors_written = 0;
+ uint32_t aligned_length;
+ uint32_t leading_length = 0;
+ uint32_t trailing_length = 0;
+ uint32_t partial_sectors = 0;
+ uint32_t bytes_written = 0;
+ uint64_t file_offset;
+ VHDXHeader *header;
+ VHDXLogEntryHeader new_hdr;
+ VHDXLogDescriptor *new_desc = NULL;
+ VHDXLogDataSector *data_sector = NULL;
+ MSGUID new_guid = { 0 };
+
+ header = s->headers[s->curr_header];
+
+ /* need to have offset read data, and be on 4096 byte boundary */
+
+ if (length > header->log_length) {
+ /* no log present. we could create a log here instead of failing */
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (guid_eq(header->log_guid, zero_guid)) {
+ vhdx_guid_generate(&new_guid);
+ vhdx_update_headers(bs, s, false, &new_guid);
+ } else {
+ /* currently, we require that the log be flushed after
+ * every write. */
+ ret = -ENOTSUP;
+ goto exit;
+ }
+
+ /* 0 is an invalid sequence number, but may also represent the first
+ * log write (or a wrapped seq) */
+ if (s->log.sequence == 0) {
+ s->log.sequence = 1;
+ }
+
+ sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
+ file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;
+
+ aligned_length = length;
+
+ /* add in the unaligned head and tail bytes */
+ if (sector_offset) {
+ leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
+ leading_length = leading_length > length ? length : leading_length;
+ aligned_length -= leading_length;
+ partial_sectors++;
+ }
+
+ sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
+ trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
+ if (trailing_length) {
+ partial_sectors++;
+ }
+
+ sectors += partial_sectors;
+
+ /* sectors is now how many sectors the data itself takes, not
+ * including the header and descriptor metadata */
+
+ new_hdr = (VHDXLogEntryHeader) {
+ .signature = VHDX_LOG_SIGNATURE,
+ .tail = s->log.tail,
+ .sequence_number = s->log.sequence,
+ .descriptor_count = sectors,
+ .reserved = 0,
+ .flushed_file_offset = bdrv_getlength(bs->file->bs),
+ .last_file_offset = bdrv_getlength(bs->file->bs),
+ };
+
+ new_hdr.log_guid = header->log_guid;
+
+ desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
+
+ total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
+ new_hdr.entry_length = total_length;
+
+ vhdx_log_entry_hdr_le_export(&new_hdr);
+
+ buffer = qemu_blockalign(bs, total_length);
+ memcpy(buffer, &new_hdr, sizeof(new_hdr));
+
+ new_desc = buffer + sizeof(new_hdr);
+ data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
+ data_tmp = data;
+
+ /* All log sectors are 4KB, so for any partial sectors we must
+ * merge the data with preexisting data from the final file
+ * destination */
+ merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+ for (i = 0; i < sectors; i++) {
+ new_desc->signature = VHDX_LOG_DESC_SIGNATURE;
+ new_desc->sequence_number = s->log.sequence;
+ new_desc->file_offset = file_offset;
+
+ if (i == 0 && leading_length) {
+ /* partial sector at the front of the buffer */
+ ret = bdrv_pread(bs->file, file_offset, merged_sector,
+ VHDX_LOG_SECTOR_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+ memcpy(merged_sector + sector_offset, data_tmp, leading_length);
+ bytes_written = leading_length;
+ sector_write = merged_sector;
+ } else if (i == sectors - 1 && trailing_length) {
+ /* partial sector at the end of the buffer */
+ ret = bdrv_pread(bs->file,
+ file_offset,
+ merged_sector + trailing_length,
+ VHDX_LOG_SECTOR_SIZE - trailing_length);
+ if (ret < 0) {
+ goto exit;
+ }
+ memcpy(merged_sector, data_tmp, trailing_length);
+ bytes_written = trailing_length;
+ sector_write = merged_sector;
+ } else {
+ bytes_written = VHDX_LOG_SECTOR_SIZE;
+ sector_write = data_tmp;
+ }
+
+ /* populate the raw sector data into the proper structures,
+ * as well as update the descriptor, and convert to proper
+ * endianness */
+ vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
+ s->log.sequence);
+
+ data_tmp += bytes_written;
+ data_sector++;
+ new_desc++;
+ file_offset += VHDX_LOG_SECTOR_SIZE;
+ }
+
+ /* checksum covers entire entry, from the log header through the
+ * last data sector */
+ vhdx_update_checksum(buffer, total_length,
+ offsetof(VHDXLogEntryHeader, checksum));
+
+ /* now write to the log */
+ ret = vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer,
+ desc_sectors + sectors);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ if (sectors_written != desc_sectors + sectors) {
+ /* instead of failing, we could flush the log here */
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ s->log.sequence++;
+ /* write new tail */
+ s->log.tail = s->log.write;
+
+exit:
+ qemu_vfree(buffer);
+ qemu_vfree(merged_sector);
+ return ret;
+}
+
+/* Perform a log write, and then immediately flush the entire log */
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+ void *data, uint32_t length, uint64_t offset)
+{
+ int ret = 0;
+ VHDXLogSequence logs = { .valid = true,
+ .count = 1,
+ .hdr = { 0 } };
+
+
+ /* Make sure data written (new and/or changed blocks) is stable
+ * on disk, before creating log entry */
+ bdrv_flush(bs);
+ ret = vhdx_log_write(bs, s, data, length, offset);
+ if (ret < 0) {
+ goto exit;
+ }
+ logs.log = s->log;
+
+ /* Make sure log is stable on disk */
+ bdrv_flush(bs);
+ ret = vhdx_log_flush(bs, s, &logs);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ s->log = logs.log;
+
+exit:
+ return ret;
+}
+