X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/ebf53fcdaeabd51b0fa66d65366c188dd974d4da..3f5075ae63b6dc1de9428d028a4d28fc98e7fdff:/block.c diff --git a/block.c b/block.c index 1ef4ae5f3e..115e5913eb 100644 --- a/block.c +++ b/block.c @@ -22,19 +22,17 @@ * THE SOFTWARE. */ #include "config-host.h" -#ifdef HOST_BSD -/* include native header before sys-queue.h */ -#include -#endif - #include "qemu-common.h" #include "monitor.h" #include "block_int.h" +#include "module.h" +#include "qemu-objects.h" -#ifdef HOST_BSD +#ifdef CONFIG_BSD #include #include #include +#include #ifndef __DragonFly__ #include #endif @@ -44,22 +42,14 @@ #include #endif -#define SECTOR_BITS 9 -#define SECTOR_SIZE (1 << SECTOR_BITS) - -typedef struct BlockDriverAIOCBSync { - BlockDriverAIOCB common; - QEMUBH *bh; - int ret; -} BlockDriverAIOCBSync; - -static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors, +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); -static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, - int64_t sector_num, const uint8_t *buf, int nb_sectors, +static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque); -static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb); static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, @@ -69,6 +59,9 @@ BlockDriverState *bdrv_first; static BlockDriver *first_drv; +/* If non-zero, use only whitelisted block drivers */ +static int use_bdrv_whitelist; + int path_is_absolute(const char *path) { const char *p; @@ -133,20 +126,21 @@ void path_combine(char *dest, int dest_size, } } - -static void bdrv_register(BlockDriver *bdrv) +void bdrv_register(BlockDriver *bdrv) { - if (!bdrv->bdrv_aio_read) { + if (!bdrv->bdrv_aio_readv) { /* add AIO emulation layer */ - bdrv->bdrv_aio_read = bdrv_aio_read_em; - bdrv->bdrv_aio_write = bdrv_aio_write_em; - bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em; - bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync); - } else if (!bdrv->bdrv_read && !bdrv->bdrv_pread) { + bdrv->bdrv_aio_readv = bdrv_aio_readv_em; + bdrv->bdrv_aio_writev = bdrv_aio_writev_em; + } else if (!bdrv->bdrv_read) { /* add synchronous IO emulation layer */ bdrv->bdrv_read = bdrv_read_em; bdrv->bdrv_write = bdrv_write_em; } + + if (!bdrv->bdrv_aio_flush) + bdrv->bdrv_aio_flush = bdrv_aio_flush_em; + bdrv->next = first_drv; first_drv = bdrv; } @@ -178,13 +172,37 @@ BlockDriver *bdrv_find_format(const char *format_name) return NULL; } -int bdrv_create(BlockDriver *drv, - const char *filename, int64_t size_in_sectors, - const char *backing_file, int flags) +static int bdrv_is_whitelisted(BlockDriver *drv) +{ + static const char *whitelist[] = { + CONFIG_BDRV_WHITELIST + }; + const char **p; + + if (!whitelist[0]) + return 1; /* no whitelist, anything goes */ + + for (p = whitelist; *p; p++) { + if (!strcmp(drv->format_name, *p)) { + return 1; + } + } + return 0; +} + +BlockDriver *bdrv_find_whitelisted_format(const char *format_name) +{ + BlockDriver *drv = bdrv_find_format(format_name); + return drv && bdrv_is_whitelisted(drv) ? drv : NULL; +} + +int bdrv_create(BlockDriver *drv, const char* filename, + QEMUOptionParameter *options) { if (!drv->bdrv_create) return -ENOTSUP; - return drv->bdrv_create(filename, size_in_sectors, backing_file, flags); + + return drv->bdrv_create(filename, options); } #ifdef _WIN32 @@ -218,7 +236,7 @@ static int is_windows_drive_prefix(const char *filename) filename[1] == ':'); } -static int is_windows_drive(const char *filename) +int is_windows_drive(const char *filename) { if (is_windows_drive_prefix(filename) && filename[2] == '\0') @@ -240,11 +258,11 @@ static BlockDriver *find_protocol(const char *filename) #ifdef _WIN32 if (is_windows_drive(filename) || is_windows_drive_prefix(filename)) - return &bdrv_raw; + return bdrv_find_format("raw"); #endif p = strchr(filename, ':'); if (!p) - return &bdrv_raw; + return bdrv_find_format("raw"); len = p - filename; if (len > sizeof(protocol) - 1) len = sizeof(protocol) - 1; @@ -258,8 +276,28 @@ static BlockDriver *find_protocol(const char *filename) return NULL; } -/* XXX: force raw format if block or character device ? It would - simplify the BSD case */ +/* + * Detect host devices. By convention, /dev/cdrom[N] is always + * recognized as a host CDROM. + */ +static BlockDriver *find_hdev_driver(const char *filename) +{ + int score_max = 0, score; + BlockDriver *drv = NULL, *d; + + for (d = first_drv; d; d = d->next) { + if (d->bdrv_probe_device) { + score = d->bdrv_probe_device(filename); + if (score > score_max) { + score_max = score; + drv = d; + } + } + } + + return drv; +} + static BlockDriver *find_image_format(const char *filename) { int ret, score, score_max; @@ -267,26 +305,9 @@ static BlockDriver *find_image_format(const char *filename) uint8_t buf[2048]; BlockDriverState *bs; - /* detect host devices. By convention, /dev/cdrom[N] is always - recognized as a host CDROM */ - if (strstart(filename, "/dev/cdrom", NULL)) - return &bdrv_host_device; -#ifdef _WIN32 - if (is_windows_drive(filename)) - return &bdrv_host_device; -#else - { - struct stat st; - if (stat(filename, &st) >= 0 && - (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) { - return &bdrv_host_device; - } - } -#endif - drv = find_protocol(filename); /* no need to test disk image formats for vvfat */ - if (drv == &bdrv_vvfat) + if (drv && strcmp(drv->format_name, "vvfat") == 0) return drv; ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY); @@ -335,31 +356,34 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags) int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, BlockDriver *drv) { - int ret, open_flags; + int ret, open_flags, try_rw; char tmp_filename[PATH_MAX]; char backing_filename[PATH_MAX]; - bs->read_only = 0; bs->is_temporary = 0; bs->encrypted = 0; bs->valid_key = 0; + /* buffer_alignment defaulted to 512, drivers can change this value */ + bs->buffer_alignment = 512; if (flags & BDRV_O_SNAPSHOT) { BlockDriverState *bs1; int64_t total_size; int is_protocol = 0; + BlockDriver *bdrv_qcow2; + QEMUOptionParameter *options; /* if snapshot, we create a temporary backing file and open it instead of opening 'filename' directly */ /* if there is a backing file, use it */ bs1 = bdrv_new(""); - ret = bdrv_open(bs1, filename, 0); + ret = bdrv_open2(bs1, filename, 0, drv); if (ret < 0) { bdrv_delete(bs1); return ret; } - total_size = bdrv_getlength(bs1) >> SECTOR_BITS; + total_size = bdrv_getlength(bs1) >> BDRV_SECTOR_BITS; if (bs1->drv && bs1->drv->protocol_name) is_protocol = 1; @@ -372,15 +396,26 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, if (is_protocol) snprintf(backing_filename, sizeof(backing_filename), "%s", filename); - else - realpath(filename, backing_filename); + else if (!realpath(filename, backing_filename)) + return -errno; + + bdrv_qcow2 = bdrv_find_format("qcow2"); + options = parse_option_parameters("", bdrv_qcow2->create_options, NULL); - ret = bdrv_create(&bdrv_qcow2, tmp_filename, - total_size, backing_filename, 0); + set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512); + set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename); + if (drv) { + set_option_parameter(options, BLOCK_OPT_BACKING_FMT, + drv->format_name); + } + + ret = bdrv_create(bdrv_qcow2, tmp_filename, options); if (ret < 0) { return ret; } + filename = tmp_filename; + drv = bdrv_qcow2; bs->is_temporary = 1; } @@ -388,7 +423,10 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, if (flags & BDRV_O_FILE) { drv = find_protocol(filename); } else if (!drv) { - drv = find_image_format(filename); + drv = find_hdev_driver(filename); + if (!drv) { + drv = find_image_format(filename); + } } if (!drv) { ret = -ENOENT; @@ -396,13 +434,28 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, } bs->drv = drv; bs->opaque = qemu_mallocz(drv->instance_size); + + /* + * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a + * write cache to the guest. We do need the fdatasync to flush + * out transactions for block allocations, and we maybe have a + * volatile write cache in our backing device to deal with. + */ + if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE)) + bs->enable_write_cache = 1; + /* Note: for compatibility, we open disk image files as RDWR, and RDONLY as fallback */ + try_rw = !bs->read_only || bs->is_temporary; if (!(flags & BDRV_O_FILE)) - open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK); + open_flags = (try_rw ? BDRV_O_RDWR : 0) | + (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO)); else open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); - ret = drv->bdrv_open(bs, filename, open_flags); + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) + ret = -ENOTSUP; + else + ret = drv->bdrv_open(bs, filename, open_flags); if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) { ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR); bs->read_only = 1; @@ -417,19 +470,25 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, return ret; } if (drv->bdrv_getlength) { - bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS; + bs->total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; } #ifndef _WIN32 if (bs->is_temporary) { unlink(filename); } #endif - if (bs->backing_file[0] != '\0') { + if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') { /* if there is a backing file, use it */ + BlockDriver *back_drv = NULL; bs->backing_hd = bdrv_new(""); + /* pass on read_only property to the backing_hd */ + bs->backing_hd->read_only = bs->read_only; path_combine(backing_filename, sizeof(backing_filename), filename, bs->backing_file); - ret = bdrv_open(bs->backing_hd, backing_filename, open_flags); + if (bs->backing_format[0] != '\0') + back_drv = bdrv_find_format(bs->backing_format); + ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags, + back_drv); if (ret < 0) { bdrv_close(bs); return ret; @@ -481,6 +540,20 @@ void bdrv_delete(BlockDriverState *bs) qemu_free(bs); } +/* + * Run consistency checks on an image + * + * Returns the number of errors or -errno when an internal error occurs + */ +int bdrv_check(BlockDriverState *bs) +{ + if (bs->drv->bdrv_check == NULL) { + return -ENOTSUP; + } + + return bs->drv->bdrv_check(bs); +} + /* commit COW file into the raw image */ int bdrv_commit(BlockDriverState *bs) { @@ -500,7 +573,7 @@ int bdrv_commit(BlockDriverState *bs) return -ENOTSUP; } - total_sectors = bdrv_getlength(bs) >> SECTOR_BITS; + total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; for (i = 0; i < total_sectors;) { if (drv->bdrv_is_allocated(bs, i, 65536, &n)) { for(j = 0; j < n; j++) { @@ -521,9 +594,35 @@ int bdrv_commit(BlockDriverState *bs) if (drv->bdrv_make_empty) return drv->bdrv_make_empty(bs); + /* + * Make sure all data we wrote to the backing device is actually + * stable on disk. + */ + if (bs->backing_hd) + bdrv_flush(bs->backing_hd); return 0; } +/* + * Return values: + * 0 - success + * -EINVAL - backing format specified, but no file + * -ENOSPC - can't update the backing file because no space is left in the + * image file header + * -ENOTSUP - format driver doesn't support changing the backing file + */ +int bdrv_change_backing_file(BlockDriverState *bs, + const char *backing_file, const char *backing_fmt) +{ + BlockDriver *drv = bs->drv; + + if (drv->bdrv_change_backing_file != NULL) { + return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); + } else { + return -ENOTSUP; + } +} + static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, size_t size) { @@ -537,7 +636,10 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, len = bdrv_getlength(bs); - if ((offset + size) > len) + if (offset < 0) + return -EIO; + + if ((offset > len) || (len - offset < size)) return -EIO; return 0; @@ -546,15 +648,7 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { - int64_t offset; - - /* Deal with byte accesses */ - if (sector_num < 0) - offset = -sector_num; - else - offset = sector_num * 512; - - return bdrv_check_byte_request(bs, offset, nb_sectors * 512); + return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512); } /* return < 0 if error. See bdrv_write() for the return codes */ @@ -568,21 +662,28 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num, if (bdrv_check_request(bs, sector_num, nb_sectors)) return -EIO; - if (drv->bdrv_pread) { - int ret, len; - len = nb_sectors * 512; - ret = drv->bdrv_pread(bs, sector_num * 512, buf, len); - if (ret < 0) - return ret; - else if (ret != len) - return -EINVAL; - else { - bs->rd_bytes += (unsigned) len; - bs->rd_ops ++; - return 0; - } - } else { - return drv->bdrv_read(bs, sector_num, buf, nb_sectors); + return drv->bdrv_read(bs, sector_num, buf, nb_sectors); +} + +static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int dirty) +{ + int64_t start, end; + unsigned long val, idx, bit; + + start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK; + end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK; + + for (; start <= end; start++) { + idx = start / (sizeof(unsigned long) * 8); + bit = start % (sizeof(unsigned long) * 8); + val = bs->dirty_bitmap[idx]; + if (dirty) { + val |= 1 << bit; + } else { + val &= ~(1 << bit); + } + bs->dirty_bitmap[idx] = val; } } @@ -603,42 +704,30 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, if (bdrv_check_request(bs, sector_num, nb_sectors)) return -EIO; - if (drv->bdrv_pwrite) { - int ret, len, count = 0; - len = nb_sectors * 512; - do { - ret = drv->bdrv_pwrite(bs, sector_num * 512, buf, len - count); - if (ret < 0) { - printf("bdrv_write ret=%d\n", ret); - return ret; - } - count += ret; - buf += ret; - } while (count != len); - bs->wr_bytes += (unsigned) len; - bs->wr_ops ++; - return 0; + if (bs->dirty_bitmap) { + set_dirty_bitmap(bs, sector_num, nb_sectors, 1); } + return drv->bdrv_write(bs, sector_num, buf, nb_sectors); } -static int bdrv_pread_em(BlockDriverState *bs, int64_t offset, - uint8_t *buf, int count1) +int bdrv_pread(BlockDriverState *bs, int64_t offset, + void *buf, int count1) { - uint8_t tmp_buf[SECTOR_SIZE]; + uint8_t tmp_buf[BDRV_SECTOR_SIZE]; int len, nb_sectors, count; int64_t sector_num; count = count1; /* first read to align to sector start */ - len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1); + len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); if (len > count) len = count; - sector_num = offset >> SECTOR_BITS; + sector_num = offset >> BDRV_SECTOR_BITS; if (len > 0) { if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0) return -EIO; - memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len); + memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len); count -= len; if (count == 0) return count1; @@ -647,12 +736,12 @@ static int bdrv_pread_em(BlockDriverState *bs, int64_t offset, } /* read the sectors "in place" */ - nb_sectors = count >> SECTOR_BITS; + nb_sectors = count >> BDRV_SECTOR_BITS; if (nb_sectors > 0) { if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0) return -EIO; sector_num += nb_sectors; - len = nb_sectors << SECTOR_BITS; + len = nb_sectors << BDRV_SECTOR_BITS; buf += len; count -= len; } @@ -666,23 +755,23 @@ static int bdrv_pread_em(BlockDriverState *bs, int64_t offset, return count1; } -static int bdrv_pwrite_em(BlockDriverState *bs, int64_t offset, - const uint8_t *buf, int count1) +int bdrv_pwrite(BlockDriverState *bs, int64_t offset, + const void *buf, int count1) { - uint8_t tmp_buf[SECTOR_SIZE]; + uint8_t tmp_buf[BDRV_SECTOR_SIZE]; int len, nb_sectors, count; int64_t sector_num; count = count1; /* first write to align to sector start */ - len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1); + len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); if (len > count) len = count; - sector_num = offset >> SECTOR_BITS; + sector_num = offset >> BDRV_SECTOR_BITS; if (len > 0) { if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0) return -EIO; - memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len); + memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len); if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0) return -EIO; count -= len; @@ -693,12 +782,12 @@ static int bdrv_pwrite_em(BlockDriverState *bs, int64_t offset, } /* write the sectors "in place" */ - nb_sectors = count >> SECTOR_BITS; + nb_sectors = count >> BDRV_SECTOR_BITS; if (nb_sectors > 0) { if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0) return -EIO; sector_num += nb_sectors; - len = nb_sectors << SECTOR_BITS; + len = nb_sectors << BDRV_SECTOR_BITS; buf += len; count -= len; } @@ -714,42 +803,6 @@ static int bdrv_pwrite_em(BlockDriverState *bs, int64_t offset, return count1; } -/** - * Read with byte offsets (needed only for file protocols) - */ -int bdrv_pread(BlockDriverState *bs, int64_t offset, - void *buf1, int count1) -{ - BlockDriver *drv = bs->drv; - - if (!drv) - return -ENOMEDIUM; - if (bdrv_check_byte_request(bs, offset, count1)) - return -EIO; - - if (!drv->bdrv_pread) - return bdrv_pread_em(bs, offset, buf1, count1); - return drv->bdrv_pread(bs, offset, buf1, count1); -} - -/** - * Write with byte offsets (needed only for file protocols) - */ -int bdrv_pwrite(BlockDriverState *bs, int64_t offset, - const void *buf1, int count1) -{ - BlockDriver *drv = bs->drv; - - if (!drv) - return -ENOMEDIUM; - if (bdrv_check_byte_request(bs, offset, count1)) - return -EIO; - - if (!drv->bdrv_pwrite) - return bdrv_pwrite_em(bs, offset, buf1, count1); - return drv->bdrv_pwrite(bs, offset, buf1, count1); -} - /** * Truncate file to 'offset' bytes (needed only for file protocols) */ @@ -760,6 +813,8 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset) return -ENOMEDIUM; if (!drv->bdrv_truncate) return -ENOTSUP; + if (bs->read_only) + return -EACCES; return drv->bdrv_truncate(bs, offset); } @@ -773,7 +828,7 @@ int64_t bdrv_getlength(BlockDriverState *bs) return -ENOMEDIUM; if (!drv->bdrv_getlength) { /* legacy mode */ - return bs->total_sectors * SECTOR_SIZE; + return bs->total_sectors * BDRV_SECTOR_SIZE; } return drv->bdrv_getlength(bs); } @@ -786,7 +841,7 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) if (length < 0) length = 0; else - length = length >> SECTOR_BITS; + length = length >> BDRV_SECTOR_BITS; *nb_sectors_ptr = length; } @@ -954,11 +1009,23 @@ int bdrv_is_read_only(BlockDriverState *bs) return bs->read_only; } +int bdrv_set_read_only(BlockDriverState *bs, int read_only) +{ + int ret = bs->read_only; + bs->read_only = read_only; + return ret; +} + int bdrv_is_sg(BlockDriverState *bs) { return bs->sg; } +int bdrv_enable_write_cache(BlockDriverState *bs) +{ + return bs->enable_write_cache; +} + /* XXX: no longer used */ void bdrv_set_change_cb(BlockDriverState *bs, void (*change_cb)(void *opaque), void *opaque) @@ -1054,10 +1121,8 @@ const char *bdrv_get_device_name(BlockDriverState *bs) void bdrv_flush(BlockDriverState *bs) { - if (bs->drv->bdrv_flush) + if (bs->drv && bs->drv->bdrv_flush) bs->drv->bdrv_flush(bs); - if (bs->backing_hd) - bdrv_flush(bs->backing_hd); } void bdrv_flush_all(void) @@ -1097,61 +1162,203 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum); } -void bdrv_info(Monitor *mon) +static void bdrv_print_dict(QObject *obj, void *opaque) +{ + QDict *bs_dict; + Monitor *mon = opaque; + + bs_dict = qobject_to_qdict(obj); + + monitor_printf(mon, "%s: type=%s removable=%d", + qdict_get_str(bs_dict, "device"), + qdict_get_str(bs_dict, "type"), + qdict_get_bool(bs_dict, "removable")); + + if (qdict_get_bool(bs_dict, "removable")) { + monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked")); + } + + if (qdict_haskey(bs_dict, "inserted")) { + QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted")); + + monitor_printf(mon, " file="); + monitor_print_filename(mon, qdict_get_str(qdict, "file")); + if (qdict_haskey(qdict, "backing_file")) { + monitor_printf(mon, " backing_file="); + monitor_print_filename(mon, qdict_get_str(qdict, "backing_file")); + } + monitor_printf(mon, " ro=%d drv=%s encrypted=%d", + qdict_get_bool(qdict, "ro"), + qdict_get_str(qdict, "drv"), + qdict_get_bool(qdict, "encrypted")); + } else { + monitor_printf(mon, " [not inserted]"); + } + + monitor_printf(mon, "\n"); +} + +void bdrv_info_print(Monitor *mon, const QObject *data) { + qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon); +} + +/** + * bdrv_info(): Block devices information + * + * Each block device information is stored in a QDict and the + * returned QObject is a QList of all devices. + * + * The QDict contains the following: + * + * - "device": device name + * - "type": device type + * - "removable": true if the device is removable, false otherwise + * - "locked": true if the device is locked, false otherwise + * - "inserted": only present if the device is inserted, it is a QDict + * containing the following: + * - "file": device file name + * - "ro": true if read-only, false otherwise + * - "drv": driver format name + * - "backing_file": backing file name if one is used + * - "encrypted": true if encrypted, false otherwise + * + * Example: + * + * [ { "device": "ide0-hd0", "type": "hd", "removable": false, "locked": false, + * "inserted": { "file": "/tmp/foobar", "ro": false, "drv": "qcow2" } }, + * { "device": "floppy0", "type": "floppy", "removable": true, + * "locked": false } ] + */ +void bdrv_info(Monitor *mon, QObject **ret_data) +{ + QList *bs_list; BlockDriverState *bs; + bs_list = qlist_new(); + for (bs = bdrv_first; bs != NULL; bs = bs->next) { - monitor_printf(mon, "%s:", bs->device_name); - monitor_printf(mon, " type="); + QObject *bs_obj; + const char *type = "unknown"; + switch(bs->type) { case BDRV_TYPE_HD: - monitor_printf(mon, "hd"); + type = "hd"; break; case BDRV_TYPE_CDROM: - monitor_printf(mon, "cdrom"); + type = "cdrom"; break; case BDRV_TYPE_FLOPPY: - monitor_printf(mon, "floppy"); + type = "floppy"; break; } - monitor_printf(mon, " removable=%d", bs->removable); - if (bs->removable) { - monitor_printf(mon, " locked=%d", bs->locked); - } + + bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, " + "'removable': %i, 'locked': %i }", + bs->device_name, type, bs->removable, + bs->locked); + assert(bs_obj != NULL); + if (bs->drv) { - monitor_printf(mon, " file="); - monitor_print_filename(mon, bs->filename); + QObject *obj; + QDict *bs_dict = qobject_to_qdict(bs_obj); + + obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, " + "'encrypted': %i }", + bs->filename, bs->read_only, + bs->drv->format_name, + bdrv_is_encrypted(bs)); + assert(obj != NULL); if (bs->backing_file[0] != '\0') { - monitor_printf(mon, " backing_file="); - monitor_print_filename(mon, bs->backing_file); + QDict *qdict = qobject_to_qdict(obj); + qdict_put(qdict, "backing_file", + qstring_from_str(bs->backing_file)); } - monitor_printf(mon, " ro=%d", bs->read_only); - monitor_printf(mon, " drv=%s", bs->drv->format_name); - monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs)); - } else { - monitor_printf(mon, " [not inserted]"); + + qdict_put_obj(bs_dict, "inserted", obj); } - monitor_printf(mon, "\n"); + qlist_append_obj(bs_list, bs_obj); } + + *ret_data = QOBJECT(bs_list); +} + +static void bdrv_stats_iter(QObject *data, void *opaque) +{ + QDict *qdict; + Monitor *mon = opaque; + + qdict = qobject_to_qdict(data); + monitor_printf(mon, "%s:", qdict_get_str(qdict, "device")); + + qdict = qobject_to_qdict(qdict_get(qdict, "stats")); + monitor_printf(mon, " rd_bytes=%" PRId64 + " wr_bytes=%" PRId64 + " rd_operations=%" PRId64 + " wr_operations=%" PRId64 + "\n", + qdict_get_int(qdict, "rd_bytes"), + qdict_get_int(qdict, "wr_bytes"), + qdict_get_int(qdict, "rd_operations"), + qdict_get_int(qdict, "wr_operations")); } -/* The "info blockstats" command. */ -void bdrv_info_stats(Monitor *mon) +void bdrv_stats_print(Monitor *mon, const QObject *data) { + qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon); +} + +/** + * bdrv_info_stats(): show block device statistics + * + * Each device statistic information is stored in a QDict and + * the returned QObject is a QList of all devices. + * + * The QDict contains the following: + * + * - "device": device name + * - "stats": A QDict with the statistics information, it contains: + * - "rd_bytes": bytes read + * - "wr_bytes": bytes written + * - "rd_operations": read operations + * - "wr_operations": write operations + * + * Example: + * + * [ { "device": "ide0-hd0", + * "stats": { "rd_bytes": 512, + * "wr_bytes": 0, + * "rd_operations": 1, + * "wr_operations": 0 } }, + * { "device": "ide1-cd0", + * "stats": { "rd_bytes": 0, + * "wr_bytes": 0, + * "rd_operations": 0, + * "wr_operations": 0 } } ] + */ +void bdrv_info_stats(Monitor *mon, QObject **ret_data) +{ + QObject *obj; + QList *devices; BlockDriverState *bs; + devices = qlist_new(); + for (bs = bdrv_first; bs != NULL; bs = bs->next) { - monitor_printf(mon, "%s:" - " rd_bytes=%" PRIu64 - " wr_bytes=%" PRIu64 - " rd_operations=%" PRIu64 - " wr_operations=%" PRIu64 - "\n", - bs->device_name, - bs->rd_bytes, bs->wr_bytes, - bs->rd_ops, bs->wr_ops); + obj = qobject_from_jsonf("{ 'device': %s, 'stats': {" + "'rd_bytes': %" PRId64 "," + "'wr_bytes': %" PRId64 "," + "'rd_operations': %" PRId64 "," + "'wr_operations': %" PRId64 + "} }", + bs->device_name, + bs->rd_bytes, bs->wr_bytes, + bs->rd_ops, bs->wr_ops); + assert(obj != NULL); + qlist_append_obj(devices, obj); } + + *ret_data = QOBJECT(devices); } const char *bdrv_get_encrypted_filename(BlockDriverState *bs) @@ -1167,7 +1374,7 @@ const char *bdrv_get_encrypted_filename(BlockDriverState *bs) void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size) { - if (!bs->backing_hd) { + if (!bs->backing_file) { pstrcpy(filename, filename_size, ""); } else { pstrcpy(filename, filename_size, bs->backing_file); @@ -1182,6 +1389,13 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, return -ENOMEDIUM; if (!drv->bdrv_write_compressed) return -ENOTSUP; + if (bdrv_check_request(bs, sector_num, nb_sectors)) + return -EIO; + + if (bs->dirty_bitmap) { + set_dirty_bitmap(bs, sector_num, nb_sectors, 1); + } + return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); } @@ -1196,6 +1410,28 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) return drv->bdrv_get_info(bs, bdi); } +int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, + int64_t pos, int size) +{ + BlockDriver *drv = bs->drv; + if (!drv) + return -ENOMEDIUM; + if (!drv->bdrv_save_vmstate) + return -ENOTSUP; + return drv->bdrv_save_vmstate(bs, buf, pos, size); +} + +int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size) +{ + BlockDriver *drv = bs->drv; + if (!drv) + return -ENOMEDIUM; + if (!drv->bdrv_load_vmstate) + return -ENOTSUP; + return drv->bdrv_load_vmstate(bs, buf, pos, size); +} + /**************************************************************/ /* handling of snapshots */ @@ -1319,78 +1555,9 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) /**************************************************************/ /* async I/Os */ -typedef struct VectorTranslationState { - QEMUIOVector *iov; - uint8_t *bounce; - int is_write; - BlockDriverAIOCB *aiocb; - BlockDriverAIOCB *this_aiocb; -} VectorTranslationState; - -static void bdrv_aio_rw_vector_cb(void *opaque, int ret) -{ - VectorTranslationState *s = opaque; - - if (!s->is_write) { - qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size); - } - qemu_vfree(s->bounce); - s->this_aiocb->cb(s->this_aiocb->opaque, ret); - qemu_aio_release(s->this_aiocb); -} - -static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *iov, - int nb_sectors, - BlockDriverCompletionFunc *cb, - void *opaque, - int is_write) - -{ - VectorTranslationState *s = qemu_mallocz(sizeof(*s)); - BlockDriverAIOCB *aiocb = qemu_aio_get(bs, cb, opaque); - - s->this_aiocb = aiocb; - s->iov = iov; - s->bounce = qemu_memalign(512, nb_sectors * 512); - s->is_write = is_write; - if (is_write) { - qemu_iovec_to_buffer(s->iov, s->bounce); - s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors, - bdrv_aio_rw_vector_cb, s); - } else { - s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors, - bdrv_aio_rw_vector_cb, s); - } - return aiocb; -} - BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *iov, int nb_sectors, + QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque) -{ - if (bdrv_check_request(bs, sector_num, nb_sectors)) - return NULL; - - return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors, - cb, opaque, 0); -} - -BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *iov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - if (bdrv_check_request(bs, sector_num, nb_sectors)) - return NULL; - - return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors, - cb, opaque, 1); -} - -BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) { BlockDriver *drv = bs->drv; BlockDriverAIOCB *ret; @@ -1400,20 +1567,21 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, if (bdrv_check_request(bs, sector_num, nb_sectors)) return NULL; - ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque); + ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, + cb, opaque); if (ret) { /* Update stats even though technically transfer has not happened. */ - bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE; + bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE; bs->rd_ops ++; } return ret; } -BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) { BlockDriver *drv = bs->drv; BlockDriverAIOCB *ret; @@ -1425,79 +1593,324 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, if (bdrv_check_request(bs, sector_num, nb_sectors)) return NULL; - ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque); + if (bs->dirty_bitmap) { + set_dirty_bitmap(bs, sector_num, nb_sectors, 1); + } + + ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors, + cb, opaque); if (ret) { /* Update stats even though technically transfer has not happened. */ - bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE; + bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE; bs->wr_ops ++; } return ret; } -void bdrv_aio_cancel(BlockDriverAIOCB *acb) + +typedef struct MultiwriteCB { + int error; + int num_requests; + int num_callbacks; + struct { + BlockDriverCompletionFunc *cb; + void *opaque; + QEMUIOVector *free_qiov; + void *free_buf; + } callbacks[]; +} MultiwriteCB; + +static void multiwrite_user_cb(MultiwriteCB *mcb) +{ + int i; + + for (i = 0; i < mcb->num_callbacks; i++) { + mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); + qemu_free(mcb->callbacks[i].free_qiov); + qemu_free(mcb->callbacks[i].free_buf); + } +} + +static void multiwrite_cb(void *opaque, int ret) { - BlockDriver *drv = acb->bs->drv; + MultiwriteCB *mcb = opaque; - if (acb->cb == bdrv_aio_rw_vector_cb) { - VectorTranslationState *s = acb->opaque; - acb = s->aiocb; + if (ret < 0) { + mcb->error = ret; + multiwrite_user_cb(mcb); } - drv->bdrv_aio_cancel(acb); + mcb->num_requests--; + if (mcb->num_requests == 0) { + if (mcb->error == 0) { + multiwrite_user_cb(mcb); + } + qemu_free(mcb); + } +} + +static int multiwrite_req_compare(const void *a, const void *b) +{ + return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector); +} + +/* + * Takes a bunch of requests and tries to merge them. Returns the number of + * requests that remain after merging. + */ +static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, + int num_reqs, MultiwriteCB *mcb) +{ + int i, outidx; + + // Sort requests by start sector + qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); + + // Check if adjacent requests touch the same clusters. If so, combine them, + // filling up gaps with zero sectors. + outidx = 0; + for (i = 1; i < num_reqs; i++) { + int merge = 0; + int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; + + // This handles the cases that are valid for all block drivers, namely + // exactly sequential writes and overlapping writes. + if (reqs[i].sector <= oldreq_last) { + merge = 1; + } + + // The block driver may decide that it makes sense to combine requests + // even if there is a gap of some sectors between them. In this case, + // the gap is filled with zeros (therefore only applicable for yet + // unused space in format like qcow2). + if (!merge && bs->drv->bdrv_merge_requests) { + merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]); + } + + if (merge) { + size_t size; + QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov)); + qemu_iovec_init(qiov, + reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); + + // Add the first request to the merged one. If the requests are + // overlapping, drop the last sectors of the first request. + size = (reqs[i].sector - reqs[outidx].sector) << 9; + qemu_iovec_concat(qiov, reqs[outidx].qiov, size); + + // We might need to add some zeros between the two requests + if (reqs[i].sector > oldreq_last) { + size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9; + uint8_t *buf = qemu_blockalign(bs, zero_bytes); + memset(buf, 0, zero_bytes); + qemu_iovec_add(qiov, buf, zero_bytes); + mcb->callbacks[i].free_buf = buf; + } + + // Add the second request + qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); + + reqs[outidx].nb_sectors += reqs[i].nb_sectors; + reqs[outidx].qiov = qiov; + + mcb->callbacks[i].free_qiov = reqs[outidx].qiov; + } else { + outidx++; + reqs[outidx].sector = reqs[i].sector; + reqs[outidx].nb_sectors = reqs[i].nb_sectors; + reqs[outidx].qiov = reqs[i].qiov; + } + } + + return outidx + 1; +} + +/* + * Submit multiple AIO write requests at once. + * + * On success, the function returns 0 and all requests in the reqs array have + * been submitted. In error case this function returns -1, and any of the + * requests may or may not be submitted yet. In particular, this means that the + * callback will be called for some of the requests, for others it won't. The + * caller must check the error field of the BlockRequest to wait for the right + * callbacks (if error != 0, no callback will be called). + * + * The implementation may modify the contents of the reqs array, e.g. to merge + * requests. However, the fields opaque and error are left unmodified as they + * are used to signal failure for a single request to the caller. + */ +int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) +{ + BlockDriverAIOCB *acb; + MultiwriteCB *mcb; + int i; + + if (num_reqs == 0) { + return 0; + } + + // Create MultiwriteCB structure + mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); + mcb->num_requests = 0; + mcb->num_callbacks = num_reqs; + + for (i = 0; i < num_reqs; i++) { + mcb->callbacks[i].cb = reqs[i].cb; + mcb->callbacks[i].opaque = reqs[i].opaque; + } + + // Check for mergable requests + num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); + + // Run the aio requests + for (i = 0; i < num_reqs; i++) { + acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov, + reqs[i].nb_sectors, multiwrite_cb, mcb); + + if (acb == NULL) { + // We can only fail the whole thing if no request has been + // submitted yet. Otherwise we'll wait for the submitted AIOs to + // complete and report the error in the callback. + if (mcb->num_requests == 0) { + reqs[i].error = EIO; + goto fail; + } else { + mcb->error = EIO; + break; + } + } else { + mcb->num_requests++; + } + } + + return 0; + +fail: + free(mcb); + return -1; +} + +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BlockDriver *drv = bs->drv; + + if (!drv) + return NULL; + return drv->bdrv_aio_flush(bs, cb, opaque); +} + +void bdrv_aio_cancel(BlockDriverAIOCB *acb) +{ + acb->pool->cancel(acb); } /**************************************************************/ /* async block device emulation */ +typedef struct BlockDriverAIOCBSync { + BlockDriverAIOCB common; + QEMUBH *bh; + int ret; + /* vector translation state */ + QEMUIOVector *qiov; + uint8_t *bounce; + int is_write; +} BlockDriverAIOCBSync; + +static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) +{ + BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb; + qemu_bh_delete(acb->bh); + acb->bh = NULL; + qemu_aio_release(acb); +} + +static AIOPool bdrv_em_aio_pool = { + .aiocb_size = sizeof(BlockDriverAIOCBSync), + .cancel = bdrv_aio_cancel_em, +}; + static void bdrv_aio_bh_cb(void *opaque) { BlockDriverAIOCBSync *acb = opaque; + + if (!acb->is_write) + qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); + qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, acb->ret); + qemu_bh_delete(acb->bh); + acb->bh = NULL; qemu_aio_release(acb); } -static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque, + int is_write) + { BlockDriverAIOCBSync *acb; - int ret; - acb = qemu_aio_get(bs, cb, opaque); + acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); + acb->is_write = is_write; + acb->qiov = qiov; + acb->bounce = qemu_blockalign(bs, qiov->size); + if (!acb->bh) acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); - ret = bdrv_read(bs, sector_num, buf, nb_sectors); - acb->ret = ret; + + if (is_write) { + qemu_iovec_to_buffer(acb->qiov, acb->bounce); + acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors); + } else { + acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors); + } + qemu_bh_schedule(acb->bh); + return &acb->common; } -static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, - int64_t sector_num, const uint8_t *buf, int nb_sectors, +static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); +} + +static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); +} + +static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { BlockDriverAIOCBSync *acb; - int ret; - acb = qemu_aio_get(bs, cb, opaque); + acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); + acb->is_write = 1; /* don't bounce in the completion hadler */ + acb->qiov = NULL; + acb->bounce = NULL; + acb->ret = 0; + if (!acb->bh) acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); - ret = bdrv_write(bs, sector_num, buf, nb_sectors); - acb->ret = ret; + + bdrv_flush(bs); qemu_bh_schedule(acb->bh); return &acb->common; } -static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) -{ - BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb; - qemu_bh_cancel(acb->bh); - qemu_aio_release(acb); -} - /**************************************************************/ /* sync block device emulation */ @@ -1513,17 +1926,29 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, { int async_ret; BlockDriverAIOCB *acb; + struct iovec iov; + QEMUIOVector qiov; + + async_context_push(); async_ret = NOT_DONE; - acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors, - bdrv_rw_em_cb, &async_ret); - if (acb == NULL) - return -1; + iov.iov_base = (void *)buf; + iov.iov_len = nb_sectors * 512; + qemu_iovec_init_external(&qiov, &iov, 1); + acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors, + bdrv_rw_em_cb, &async_ret); + if (acb == NULL) { + async_ret = -1; + goto fail; + } while (async_ret == NOT_DONE) { qemu_aio_wait(); } + +fail: + async_context_pop(); return async_ret; } @@ -1532,49 +1957,52 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, { int async_ret; BlockDriverAIOCB *acb; + struct iovec iov; + QEMUIOVector qiov; + + async_context_push(); async_ret = NOT_DONE; - acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors, - bdrv_rw_em_cb, &async_ret); - if (acb == NULL) - return -1; + iov.iov_base = (void *)buf; + iov.iov_len = nb_sectors * 512; + qemu_iovec_init_external(&qiov, &iov, 1); + acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors, + bdrv_rw_em_cb, &async_ret); + if (acb == NULL) { + async_ret = -1; + goto fail; + } while (async_ret == NOT_DONE) { qemu_aio_wait(); } + +fail: + async_context_pop(); return async_ret; } void bdrv_init(void) { - bdrv_register(&bdrv_raw); - bdrv_register(&bdrv_host_device); -#ifndef _WIN32 - bdrv_register(&bdrv_cow); -#endif - bdrv_register(&bdrv_qcow); - bdrv_register(&bdrv_vmdk); - bdrv_register(&bdrv_cloop); - bdrv_register(&bdrv_dmg); - bdrv_register(&bdrv_bochs); - bdrv_register(&bdrv_vpc); - bdrv_register(&bdrv_vvfat); - bdrv_register(&bdrv_qcow2); - bdrv_register(&bdrv_parallels); - bdrv_register(&bdrv_nbd); + module_call_init(MODULE_INIT_BLOCK); } -void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb, - void *opaque) +void bdrv_init_with_whitelist(void) +{ + use_bdrv_whitelist = 1; + bdrv_init(); +} + +void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) { - BlockDriver *drv; BlockDriverAIOCB *acb; - drv = bs->drv; - if (drv->free_aiocb) { - acb = drv->free_aiocb; - drv->free_aiocb = acb->next; + if (pool->free_aiocb) { + acb = pool->free_aiocb; + pool->free_aiocb = acb->next; } else { - acb = qemu_mallocz(drv->aiocb_size); + acb = qemu_mallocz(pool->aiocb_size); + acb->pool = pool; } acb->bs = bs; acb->cb = cb; @@ -1584,10 +2012,10 @@ void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void qemu_aio_release(void *p) { - BlockDriverAIOCB *acb = p; - BlockDriver *drv = acb->bs->drv; - acb->next = drv->free_aiocb; - drv->free_aiocb = acb; + BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p; + AIOPool *pool = acb->pool; + acb->next = pool->free_aiocb; + pool->free_aiocb = acb; } /**************************************************************/ @@ -1630,11 +2058,15 @@ int bdrv_media_changed(BlockDriverState *bs) /** * If eject_flag is TRUE, eject the media. Otherwise, close the tray */ -void bdrv_eject(BlockDriverState *bs, int eject_flag) +int bdrv_eject(BlockDriverState *bs, int eject_flag) { BlockDriver *drv = bs->drv; int ret; + if (bs->locked) { + return -EBUSY; + } + if (!drv || !drv->bdrv_eject) { ret = -ENOTSUP; } else { @@ -1643,7 +2075,10 @@ void bdrv_eject(BlockDriverState *bs, int eject_flag) if (ret == -ENOTSUP) { if (eject_flag) bdrv_close(bs); + ret = 0; } + + return ret; } int bdrv_is_locked(BlockDriverState *bs) @@ -1675,3 +2110,60 @@ int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) return drv->bdrv_ioctl(bs, req, buf); return -ENOTSUP; } + +BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BlockDriver *drv = bs->drv; + + if (drv && drv->bdrv_aio_ioctl) + return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); + return NULL; +} + + + +void *qemu_blockalign(BlockDriverState *bs, size_t size) +{ + return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size); +} + +void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable) +{ + int64_t bitmap_size; + + if (enable) { + if (!bs->dirty_bitmap) { + bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) + + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; + bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; + + bs->dirty_bitmap = qemu_mallocz(bitmap_size); + } + } else { + if (bs->dirty_bitmap) { + qemu_free(bs->dirty_bitmap); + bs->dirty_bitmap = NULL; + } + } +} + +int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) +{ + int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; + + if (bs->dirty_bitmap && + (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) { + return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] & + (1 << (chunk % (sizeof(unsigned long) * 8))); + } else { + return 0; + } +} + +void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) +{ + set_dirty_bitmap(bs, cur_sector, nr_sectors, 0); +}