]> Git Repo - qemu.git/blobdiff - block/vhdx-log.c
block: Add PreallocMode to bdrv_truncate()
[qemu.git] / block / vhdx-log.c
index 0284729ddf81245a484db22c8accb89300629dd7..01278f3fc94c5992bbbbb9627f2ba021c25fd530 100644 (file)
  * See the COPYING.LIB file in the top-level directory.
  *
  */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "qemu/error-report.h"
 #include "qemu/module.h"
+#include "qemu/bswap.h"
 #include "block/vhdx.h"
 
 
@@ -84,6 +88,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
     if (ret < 0) {
         goto exit;
     }
+    vhdx_log_entry_hdr_le_import(hdr);
 
 exit:
     return ret;
@@ -156,13 +161,63 @@ exit:
     return ret;
 }
 
+/* Writes num_sectors to the log (all log sectors are 4096 bytes),
+ * from buffer 'buffer'.  Upon return, *sectors_written will contain
+ * the number of sectors successfully written.
+ *
+ * It is assumed that 'buffer' is at least 4096*num_sectors large.
+ *
+ * 0 is returned on success, -errno otherwise */
+static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+                                  uint32_t *sectors_written, void *buffer,
+                                  uint32_t num_sectors)
+{
+    int ret = 0;
+    uint64_t offset;
+    uint32_t write;
+    void *buffer_tmp;
+    BDRVVHDXState *s = bs->opaque;
+
+    ret = vhdx_user_visible_write(bs, s);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    write = log->write;
+
+    buffer_tmp = buffer;
+    while (num_sectors) {
+
+        offset = log->offset + write;
+        write = vhdx_log_inc_idx(write, log->length);
+        if (write == log->read) {
+            /* full */
+            break;
+        }
+        ret = bdrv_pwrite(bs->file, offset, buffer_tmp,
+                          VHDX_LOG_SECTOR_SIZE);
+        if (ret < 0) {
+            goto exit;
+        }
+        buffer_tmp += VHDX_LOG_SECTOR_SIZE;
+
+        log->write = write;
+        *sectors_written = *sectors_written + 1;
+        num_sectors--;
+    }
+
+exit:
+    return ret;
+}
+
+
 /* Validates a log entry header */
 static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
                                   BDRVVHDXState *s)
 {
     int valid = false;
 
-    if (memcmp(&hdr->signature, "loge", 4)) {
+    if (hdr->signature != VHDX_LOG_SIGNATURE) {
         goto exit;
     }
 
@@ -226,12 +281,12 @@ static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
         goto exit;
     }
 
-    if (!memcmp(&desc->signature, "zero", 4)) {
+    if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
         if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
             /* valid */
             ret = true;
         }
-    } else if (!memcmp(&desc->signature, "desc", 4)) {
+    } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
             /* valid */
             ret = true;
     }
@@ -278,13 +333,15 @@ static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
  * passed into this function. Each descriptor will also be validated,
  * and error returned if any are invalid. */
 static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
-                              VHDXLogEntries *log, VHDXLogDescEntries **buffer)
+                              VHDXLogEntries *log, VHDXLogDescEntries **buffer,
+                              bool convert_endian)
 {
     int ret = 0;
     uint32_t desc_sectors;
     uint32_t sectors_read;
     VHDXLogEntryHeader hdr;
     VHDXLogDescEntries *desc_entries = NULL;
+    VHDXLogDescriptor desc;
     int i;
 
     assert(*buffer == NULL);
@@ -293,14 +350,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
     if (ret < 0) {
         goto exit;
     }
-    vhdx_log_entry_hdr_le_import(&hdr);
+
     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
         ret = -EINVAL;
         goto exit;
     }
 
     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
-    desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);
+    desc_entries = qemu_try_blockalign(bs->file->bs,
+                                       desc_sectors * VHDX_LOG_SECTOR_SIZE);
+    if (desc_entries == NULL) {
+        ret = -ENOMEM;
+        goto exit;
+    }
 
     ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
                                 desc_sectors, false);
@@ -314,12 +376,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
 
     /* put in proper endianness, and validate each desc */
     for (i = 0; i < hdr.descriptor_count; i++) {
-        vhdx_log_desc_le_import(&desc_entries->desc[i]);
-        if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
+        desc = desc_entries->desc[i];
+        vhdx_log_desc_le_import(&desc);
+        if (convert_endian) {
+            desc_entries->desc[i] = desc;
+        }
+        if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
             ret = -EINVAL;
             goto free_and_exit;
         }
     }
+    if (convert_endian) {
+        desc_entries->hdr = hdr;
+    }
 
     *buffer = desc_entries;
     goto exit;
@@ -354,7 +423,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
 
     buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
 
-    if (!memcmp(&desc->signature, "desc", 4)) {
+    if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
         /* data sector */
         if (data == NULL) {
             ret = -EFAULT;
@@ -382,10 +451,15 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
 
         memcpy(buffer+offset, &desc->trailing_bytes, 4);
 
-    } else if (!memcmp(&desc->signature, "zero", 4)) {
+    } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
         /* write 'count' sectors of sector */
         memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
         count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
+    } else {
+        error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
+                      desc->signature);
+        ret = -EINVAL;
+        goto exit;
     }
 
     file_offset = desc->file_offset;
@@ -439,18 +513,18 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
         /* if the log shows a FlushedFileOffset larger than our current file
          * size, then that means the file has been truncated / corrupted, and
          * we must refused to open it / use it */
-        if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
+        if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) {
             ret = -EINVAL;
             goto exit;
         }
 
-        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
+        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
         if (ret < 0) {
             goto exit;
         }
 
         for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
-            if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
+            if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
                 /* data sector, so read a sector to flush */
                 ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
                                             data, 1, false);
@@ -461,6 +535,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
                     ret = -EINVAL;
                     goto exit;
                 }
+                vhdx_log_data_le_import(data);
             }
 
             ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
@@ -468,12 +543,12 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
                 goto exit;
             }
         }
-        if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
+        if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) {
             new_file_size = desc_entries->hdr.last_file_offset;
             if (new_file_size % (1024*1024)) {
                 /* round up to nearest 1MB boundary */
                 new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file, new_file_size);
+                bdrv_truncate(bs->file, new_file_size, PREALLOC_MODE_OFF, NULL);
             }
         }
         qemu_vfree(desc_entries);
@@ -509,9 +584,6 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
         goto inc_and_exit;
     }
 
-    vhdx_log_entry_hdr_le_import(&hdr);
-
-
     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
         goto inc_and_exit;
     }
@@ -524,13 +596,13 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
 
     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
 
-    /* Read desc sectors, and calculate log checksum */
+    /* Read all log sectors, and calculate log checksum */
 
     total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
 
 
-    /* read_desc() will incrememnt the read idx */
-    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
+    /* read_desc() will increment the read idx */
+    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
     if (ret < 0) {
         goto free_and_exit;
     }
@@ -553,7 +625,7 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
         }
     }
     crc ^= 0xffffffff;
-    if (crc != desc_buffer->hdr.checksum) {
+    if (crc != hdr.checksum) {
         goto free_and_exit;
     }
 
@@ -657,7 +729,8 @@ exit:
  *
  * If read-only, we must replay the log in RAM (or refuse to open
  * a dirty VHDX file read-only) */
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed)
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
+                   Error **errp)
 {
     int ret = 0;
     VHDXHeader *hdr;
@@ -712,6 +785,17 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed)
     }
 
     if (logs.valid) {
+        if (bs->read_only) {
+            ret = -EPERM;
+            error_setg(errp,
+                       "VHDX image file '%s' opened read-only, but "
+                       "contains a log that needs to be replayed",
+                       bs->filename);
+            error_append_hint(errp,  "To replay the log, run:\n"
+                              "qemu-img check -r all '%s'\n",
+                              bs->filename);
+            goto exit;
+        }
         /* now flush the log */
         ret = vhdx_log_flush(bs, s, &logs);
         if (ret < 0) {
@@ -726,3 +810,235 @@ exit:
 }
 
 
+
+static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
+                                      VHDXLogDataSector *sector, void *data,
+                                      uint64_t seq)
+{
+    /* 8 + 4084 + 4 = 4096, 1 log sector */
+    memcpy(&desc->leading_bytes, data, 8);
+    data += 8;
+    cpu_to_le64s(&desc->leading_bytes);
+    memcpy(sector->data, data, 4084);
+    data += 4084;
+    memcpy(&desc->trailing_bytes, data, 4);
+    cpu_to_le32s(&desc->trailing_bytes);
+    data += 4;
+
+    sector->sequence_high  = (uint32_t) (seq >> 32);
+    sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
+    sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
+
+    vhdx_log_desc_le_export(desc);
+    vhdx_log_data_le_export(sector);
+}
+
+
+static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
+                          void *data, uint32_t length, uint64_t offset)
+{
+    int ret = 0;
+    void *buffer = NULL;
+    void *merged_sector = NULL;
+    void *data_tmp, *sector_write;
+    unsigned int i;
+    int sector_offset;
+    uint32_t desc_sectors, sectors, total_length;
+    uint32_t sectors_written = 0;
+    uint32_t aligned_length;
+    uint32_t leading_length = 0;
+    uint32_t trailing_length = 0;
+    uint32_t partial_sectors = 0;
+    uint32_t bytes_written = 0;
+    uint64_t file_offset;
+    VHDXHeader *header;
+    VHDXLogEntryHeader new_hdr;
+    VHDXLogDescriptor *new_desc = NULL;
+    VHDXLogDataSector *data_sector = NULL;
+    MSGUID new_guid = { 0 };
+
+    header = s->headers[s->curr_header];
+
+    /* need to have offset read data, and be on 4096 byte boundary */
+
+    if (length > header->log_length) {
+        /* no log present.  we could create a log here instead of failing */
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    if (guid_eq(header->log_guid, zero_guid)) {
+        vhdx_guid_generate(&new_guid);
+        vhdx_update_headers(bs, s, false, &new_guid);
+    } else {
+        /* currently, we require that the log be flushed after
+         * every write. */
+        ret = -ENOTSUP;
+        goto exit;
+    }
+
+    /* 0 is an invalid sequence number, but may also represent the first
+     * log write (or a wrapped seq) */
+    if (s->log.sequence == 0) {
+        s->log.sequence = 1;
+    }
+
+    sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
+    file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;
+
+    aligned_length = length;
+
+    /* add in the unaligned head and tail bytes */
+    if (sector_offset) {
+        leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
+        leading_length = leading_length > length ? length : leading_length;
+        aligned_length -= leading_length;
+        partial_sectors++;
+    }
+
+    sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
+    trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
+    if (trailing_length) {
+        partial_sectors++;
+    }
+
+    sectors += partial_sectors;
+
+    /* sectors is now how many sectors the data itself takes, not
+     * including the header and descriptor metadata */
+
+    new_hdr = (VHDXLogEntryHeader) {
+                .signature           = VHDX_LOG_SIGNATURE,
+                .tail                = s->log.tail,
+                .sequence_number     = s->log.sequence,
+                .descriptor_count    = sectors,
+                .reserved            = 0,
+                .flushed_file_offset = bdrv_getlength(bs->file->bs),
+                .last_file_offset    = bdrv_getlength(bs->file->bs),
+              };
+
+    new_hdr.log_guid = header->log_guid;
+
+    desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
+
+    total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
+    new_hdr.entry_length = total_length;
+
+    vhdx_log_entry_hdr_le_export(&new_hdr);
+
+    buffer = qemu_blockalign(bs, total_length);
+    memcpy(buffer, &new_hdr, sizeof(new_hdr));
+
+    new_desc = buffer + sizeof(new_hdr);
+    data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
+    data_tmp = data;
+
+    /* All log sectors are 4KB, so for any partial sectors we must
+     * merge the data with preexisting data from the final file
+     * destination */
+    merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+    for (i = 0; i < sectors; i++) {
+        new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
+        new_desc->sequence_number = s->log.sequence;
+        new_desc->file_offset     = file_offset;
+
+        if (i == 0 && leading_length) {
+            /* partial sector at the front of the buffer */
+            ret = bdrv_pread(bs->file, file_offset, merged_sector,
+                             VHDX_LOG_SECTOR_SIZE);
+            if (ret < 0) {
+                goto exit;
+            }
+            memcpy(merged_sector + sector_offset, data_tmp, leading_length);
+            bytes_written = leading_length;
+            sector_write = merged_sector;
+        } else if (i == sectors - 1 && trailing_length) {
+            /* partial sector at the end of the buffer */
+            ret = bdrv_pread(bs->file,
+                            file_offset,
+                            merged_sector + trailing_length,
+                            VHDX_LOG_SECTOR_SIZE - trailing_length);
+            if (ret < 0) {
+                goto exit;
+            }
+            memcpy(merged_sector, data_tmp, trailing_length);
+            bytes_written = trailing_length;
+            sector_write = merged_sector;
+        } else {
+            bytes_written = VHDX_LOG_SECTOR_SIZE;
+            sector_write = data_tmp;
+        }
+
+        /* populate the raw sector data into the proper structures,
+         * as well as update the descriptor, and convert to proper
+         * endianness */
+        vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
+                                  s->log.sequence);
+
+        data_tmp += bytes_written;
+        data_sector++;
+        new_desc++;
+        file_offset += VHDX_LOG_SECTOR_SIZE;
+    }
+
+    /* checksum covers entire entry, from the log header through the
+     * last data sector */
+    vhdx_update_checksum(buffer, total_length,
+                         offsetof(VHDXLogEntryHeader, checksum));
+
+    /* now write to the log */
+    ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
+                                 desc_sectors + sectors);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    if (sectors_written != desc_sectors + sectors) {
+        /* instead of failing, we could flush the log here */
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    s->log.sequence++;
+    /* write new tail */
+    s->log.tail = s->log.write;
+
+exit:
+    qemu_vfree(buffer);
+    qemu_vfree(merged_sector);
+    return ret;
+}
+
+/* Perform a log write, and then immediately flush the entire log */
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                             void *data, uint32_t length, uint64_t offset)
+{
+    int ret = 0;
+    VHDXLogSequence logs = { .valid = true,
+                             .count = 1,
+                             .hdr = { 0 } };
+
+
+    /* Make sure data written (new and/or changed blocks) is stable
+     * on disk, before creating log entry */
+    bdrv_flush(bs);
+    ret = vhdx_log_write(bs, s, data, length, offset);
+    if (ret < 0) {
+        goto exit;
+    }
+    logs.log = s->log;
+
+    /* Make sure log is stable on disk */
+    bdrv_flush(bs);
+    ret = vhdx_log_flush(bs, s, &logs);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    s->log = logs.log;
+
+exit:
+    return ret;
+}
+
This page took 0.037199 seconds and 4 git commands to generate.