]> Git Repo - qemu.git/blob - block.c
block: Leave enforcing tray lock to device models
[qemu.git] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
32
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 static void bdrv_dev_change_media_cb(BlockDriverState *bs);
48 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
49         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50         BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
52         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53         BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
55         BlockDriverCompletionFunc *cb, void *opaque);
56 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
57         BlockDriverCompletionFunc *cb, void *opaque);
58 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
59                         uint8_t *buf, int nb_sectors);
60 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
61                          const uint8_t *buf, int nb_sectors);
62 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
63         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
64         BlockDriverCompletionFunc *cb, void *opaque);
65 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
66         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
67         BlockDriverCompletionFunc *cb, void *opaque);
68 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
69                                          int64_t sector_num, int nb_sectors,
70                                          QEMUIOVector *iov);
71 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
72                                          int64_t sector_num, int nb_sectors,
73                                          QEMUIOVector *iov);
74 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
75
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77     QTAILQ_HEAD_INITIALIZER(bdrv_states);
78
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80     QLIST_HEAD_INITIALIZER(bdrv_drivers);
81
82 /* The device to use for VM snapshots */
83 static BlockDriverState *bs_snapshots;
84
85 /* If non-zero, use only whitelisted block drivers */
86 static int use_bdrv_whitelist;
87
88 #ifdef _WIN32
89 static int is_windows_drive_prefix(const char *filename)
90 {
91     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
93             filename[1] == ':');
94 }
95
96 int is_windows_drive(const char *filename)
97 {
98     if (is_windows_drive_prefix(filename) &&
99         filename[2] == '\0')
100         return 1;
101     if (strstart(filename, "\\\\.\\", NULL) ||
102         strstart(filename, "//./", NULL))
103         return 1;
104     return 0;
105 }
106 #endif
107
108 /* check if the path starts with "<protocol>:" */
109 static int path_has_protocol(const char *path)
110 {
111 #ifdef _WIN32
112     if (is_windows_drive(path) ||
113         is_windows_drive_prefix(path)) {
114         return 0;
115     }
116 #endif
117
118     return strchr(path, ':') != NULL;
119 }
120
121 int path_is_absolute(const char *path)
122 {
123     const char *p;
124 #ifdef _WIN32
125     /* specific case for names like: "\\.\d:" */
126     if (*path == '/' || *path == '\\')
127         return 1;
128 #endif
129     p = strchr(path, ':');
130     if (p)
131         p++;
132     else
133         p = path;
134 #ifdef _WIN32
135     return (*p == '/' || *p == '\\');
136 #else
137     return (*p == '/');
138 #endif
139 }
140
141 /* if filename is absolute, just copy it to dest. Otherwise, build a
142    path to it by considering it is relative to base_path. URL are
143    supported. */
144 void path_combine(char *dest, int dest_size,
145                   const char *base_path,
146                   const char *filename)
147 {
148     const char *p, *p1;
149     int len;
150
151     if (dest_size <= 0)
152         return;
153     if (path_is_absolute(filename)) {
154         pstrcpy(dest, dest_size, filename);
155     } else {
156         p = strchr(base_path, ':');
157         if (p)
158             p++;
159         else
160             p = base_path;
161         p1 = strrchr(base_path, '/');
162 #ifdef _WIN32
163         {
164             const char *p2;
165             p2 = strrchr(base_path, '\\');
166             if (!p1 || p2 > p1)
167                 p1 = p2;
168         }
169 #endif
170         if (p1)
171             p1++;
172         else
173             p1 = base_path;
174         if (p1 > p)
175             p = p1;
176         len = p - base_path;
177         if (len > dest_size - 1)
178             len = dest_size - 1;
179         memcpy(dest, base_path, len);
180         dest[len] = '\0';
181         pstrcat(dest, dest_size, filename);
182     }
183 }
184
185 void bdrv_register(BlockDriver *bdrv)
186 {
187     if (bdrv->bdrv_co_readv) {
188         /* Emulate AIO by coroutines, and sync by AIO */
189         bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
190         bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
191         bdrv->bdrv_read = bdrv_read_em;
192         bdrv->bdrv_write = bdrv_write_em;
193      } else {
194         bdrv->bdrv_co_readv = bdrv_co_readv_em;
195         bdrv->bdrv_co_writev = bdrv_co_writev_em;
196
197         if (!bdrv->bdrv_aio_readv) {
198             /* add AIO emulation layer */
199             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
200             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
201         } else if (!bdrv->bdrv_read) {
202             /* add synchronous IO emulation layer */
203             bdrv->bdrv_read = bdrv_read_em;
204             bdrv->bdrv_write = bdrv_write_em;
205         }
206     }
207
208     if (!bdrv->bdrv_aio_flush)
209         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
210
211     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
212 }
213
214 /* create a new block device (by default it is empty) */
215 BlockDriverState *bdrv_new(const char *device_name)
216 {
217     BlockDriverState *bs;
218
219     bs = g_malloc0(sizeof(BlockDriverState));
220     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
221     if (device_name[0] != '\0') {
222         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
223     }
224     return bs;
225 }
226
227 BlockDriver *bdrv_find_format(const char *format_name)
228 {
229     BlockDriver *drv1;
230     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
231         if (!strcmp(drv1->format_name, format_name)) {
232             return drv1;
233         }
234     }
235     return NULL;
236 }
237
238 static int bdrv_is_whitelisted(BlockDriver *drv)
239 {
240     static const char *whitelist[] = {
241         CONFIG_BDRV_WHITELIST
242     };
243     const char **p;
244
245     if (!whitelist[0])
246         return 1;               /* no whitelist, anything goes */
247
248     for (p = whitelist; *p; p++) {
249         if (!strcmp(drv->format_name, *p)) {
250             return 1;
251         }
252     }
253     return 0;
254 }
255
256 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
257 {
258     BlockDriver *drv = bdrv_find_format(format_name);
259     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
260 }
261
262 int bdrv_create(BlockDriver *drv, const char* filename,
263     QEMUOptionParameter *options)
264 {
265     if (!drv->bdrv_create)
266         return -ENOTSUP;
267
268     return drv->bdrv_create(filename, options);
269 }
270
271 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
272 {
273     BlockDriver *drv;
274
275     drv = bdrv_find_protocol(filename);
276     if (drv == NULL) {
277         return -ENOENT;
278     }
279
280     return bdrv_create(drv, filename, options);
281 }
282
283 #ifdef _WIN32
284 void get_tmp_filename(char *filename, int size)
285 {
286     char temp_dir[MAX_PATH];
287
288     GetTempPath(MAX_PATH, temp_dir);
289     GetTempFileName(temp_dir, "qem", 0, filename);
290 }
291 #else
292 void get_tmp_filename(char *filename, int size)
293 {
294     int fd;
295     const char *tmpdir;
296     /* XXX: race condition possible */
297     tmpdir = getenv("TMPDIR");
298     if (!tmpdir)
299         tmpdir = "/tmp";
300     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
301     fd = mkstemp(filename);
302     close(fd);
303 }
304 #endif
305
306 /*
307  * Detect host devices. By convention, /dev/cdrom[N] is always
308  * recognized as a host CDROM.
309  */
310 static BlockDriver *find_hdev_driver(const char *filename)
311 {
312     int score_max = 0, score;
313     BlockDriver *drv = NULL, *d;
314
315     QLIST_FOREACH(d, &bdrv_drivers, list) {
316         if (d->bdrv_probe_device) {
317             score = d->bdrv_probe_device(filename);
318             if (score > score_max) {
319                 score_max = score;
320                 drv = d;
321             }
322         }
323     }
324
325     return drv;
326 }
327
328 BlockDriver *bdrv_find_protocol(const char *filename)
329 {
330     BlockDriver *drv1;
331     char protocol[128];
332     int len;
333     const char *p;
334
335     /* TODO Drivers without bdrv_file_open must be specified explicitly */
336
337     /*
338      * XXX(hch): we really should not let host device detection
339      * override an explicit protocol specification, but moving this
340      * later breaks access to device names with colons in them.
341      * Thanks to the brain-dead persistent naming schemes on udev-
342      * based Linux systems those actually are quite common.
343      */
344     drv1 = find_hdev_driver(filename);
345     if (drv1) {
346         return drv1;
347     }
348
349     if (!path_has_protocol(filename)) {
350         return bdrv_find_format("file");
351     }
352     p = strchr(filename, ':');
353     assert(p != NULL);
354     len = p - filename;
355     if (len > sizeof(protocol) - 1)
356         len = sizeof(protocol) - 1;
357     memcpy(protocol, filename, len);
358     protocol[len] = '\0';
359     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
360         if (drv1->protocol_name &&
361             !strcmp(drv1->protocol_name, protocol)) {
362             return drv1;
363         }
364     }
365     return NULL;
366 }
367
368 static int find_image_format(const char *filename, BlockDriver **pdrv)
369 {
370     int ret, score, score_max;
371     BlockDriver *drv1, *drv;
372     uint8_t buf[2048];
373     BlockDriverState *bs;
374
375     ret = bdrv_file_open(&bs, filename, 0);
376     if (ret < 0) {
377         *pdrv = NULL;
378         return ret;
379     }
380
381     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
382     if (bs->sg || !bdrv_is_inserted(bs)) {
383         bdrv_delete(bs);
384         drv = bdrv_find_format("raw");
385         if (!drv) {
386             ret = -ENOENT;
387         }
388         *pdrv = drv;
389         return ret;
390     }
391
392     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
393     bdrv_delete(bs);
394     if (ret < 0) {
395         *pdrv = NULL;
396         return ret;
397     }
398
399     score_max = 0;
400     drv = NULL;
401     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
402         if (drv1->bdrv_probe) {
403             score = drv1->bdrv_probe(buf, ret, filename);
404             if (score > score_max) {
405                 score_max = score;
406                 drv = drv1;
407             }
408         }
409     }
410     if (!drv) {
411         ret = -ENOENT;
412     }
413     *pdrv = drv;
414     return ret;
415 }
416
417 /**
418  * Set the current 'total_sectors' value
419  */
420 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
421 {
422     BlockDriver *drv = bs->drv;
423
424     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
425     if (bs->sg)
426         return 0;
427
428     /* query actual device if possible, otherwise just trust the hint */
429     if (drv->bdrv_getlength) {
430         int64_t length = drv->bdrv_getlength(bs);
431         if (length < 0) {
432             return length;
433         }
434         hint = length >> BDRV_SECTOR_BITS;
435     }
436
437     bs->total_sectors = hint;
438     return 0;
439 }
440
441 /**
442  * Set open flags for a given cache mode
443  *
444  * Return 0 on success, -1 if the cache mode was invalid.
445  */
446 int bdrv_parse_cache_flags(const char *mode, int *flags)
447 {
448     *flags &= ~BDRV_O_CACHE_MASK;
449
450     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
451         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
452     } else if (!strcmp(mode, "directsync")) {
453         *flags |= BDRV_O_NOCACHE;
454     } else if (!strcmp(mode, "writeback")) {
455         *flags |= BDRV_O_CACHE_WB;
456     } else if (!strcmp(mode, "unsafe")) {
457         *flags |= BDRV_O_CACHE_WB;
458         *flags |= BDRV_O_NO_FLUSH;
459     } else if (!strcmp(mode, "writethrough")) {
460         /* this is the default */
461     } else {
462         return -1;
463     }
464
465     return 0;
466 }
467
468 /*
469  * Common part for opening disk images and files
470  */
471 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
472     int flags, BlockDriver *drv)
473 {
474     int ret, open_flags;
475
476     assert(drv != NULL);
477
478     bs->file = NULL;
479     bs->total_sectors = 0;
480     bs->encrypted = 0;
481     bs->valid_key = 0;
482     bs->open_flags = flags;
483     /* buffer_alignment defaulted to 512, drivers can change this value */
484     bs->buffer_alignment = 512;
485
486     pstrcpy(bs->filename, sizeof(bs->filename), filename);
487
488     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
489         return -ENOTSUP;
490     }
491
492     bs->drv = drv;
493     bs->opaque = g_malloc0(drv->instance_size);
494
495     if (flags & BDRV_O_CACHE_WB)
496         bs->enable_write_cache = 1;
497
498     /*
499      * Clear flags that are internal to the block layer before opening the
500      * image.
501      */
502     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
503
504     /*
505      * Snapshots should be writable.
506      */
507     if (bs->is_temporary) {
508         open_flags |= BDRV_O_RDWR;
509     }
510
511     /* Open the image, either directly or using a protocol */
512     if (drv->bdrv_file_open) {
513         ret = drv->bdrv_file_open(bs, filename, open_flags);
514     } else {
515         ret = bdrv_file_open(&bs->file, filename, open_flags);
516         if (ret >= 0) {
517             ret = drv->bdrv_open(bs, open_flags);
518         }
519     }
520
521     if (ret < 0) {
522         goto free_and_fail;
523     }
524
525     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
526
527     ret = refresh_total_sectors(bs, bs->total_sectors);
528     if (ret < 0) {
529         goto free_and_fail;
530     }
531
532 #ifndef _WIN32
533     if (bs->is_temporary) {
534         unlink(filename);
535     }
536 #endif
537     return 0;
538
539 free_and_fail:
540     if (bs->file) {
541         bdrv_delete(bs->file);
542         bs->file = NULL;
543     }
544     g_free(bs->opaque);
545     bs->opaque = NULL;
546     bs->drv = NULL;
547     return ret;
548 }
549
550 /*
551  * Opens a file using a protocol (file, host_device, nbd, ...)
552  */
553 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
554 {
555     BlockDriverState *bs;
556     BlockDriver *drv;
557     int ret;
558
559     drv = bdrv_find_protocol(filename);
560     if (!drv) {
561         return -ENOENT;
562     }
563
564     bs = bdrv_new("");
565     ret = bdrv_open_common(bs, filename, flags, drv);
566     if (ret < 0) {
567         bdrv_delete(bs);
568         return ret;
569     }
570     bs->growable = 1;
571     *pbs = bs;
572     return 0;
573 }
574
575 /*
576  * Opens a disk image (raw, qcow2, vmdk, ...)
577  */
578 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
579               BlockDriver *drv)
580 {
581     int ret;
582
583     if (flags & BDRV_O_SNAPSHOT) {
584         BlockDriverState *bs1;
585         int64_t total_size;
586         int is_protocol = 0;
587         BlockDriver *bdrv_qcow2;
588         QEMUOptionParameter *options;
589         char tmp_filename[PATH_MAX];
590         char backing_filename[PATH_MAX];
591
592         /* if snapshot, we create a temporary backing file and open it
593            instead of opening 'filename' directly */
594
595         /* if there is a backing file, use it */
596         bs1 = bdrv_new("");
597         ret = bdrv_open(bs1, filename, 0, drv);
598         if (ret < 0) {
599             bdrv_delete(bs1);
600             return ret;
601         }
602         total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
603
604         if (bs1->drv && bs1->drv->protocol_name)
605             is_protocol = 1;
606
607         bdrv_delete(bs1);
608
609         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
610
611         /* Real path is meaningless for protocols */
612         if (is_protocol)
613             snprintf(backing_filename, sizeof(backing_filename),
614                      "%s", filename);
615         else if (!realpath(filename, backing_filename))
616             return -errno;
617
618         bdrv_qcow2 = bdrv_find_format("qcow2");
619         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
620
621         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
622         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
623         if (drv) {
624             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
625                 drv->format_name);
626         }
627
628         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
629         free_option_parameters(options);
630         if (ret < 0) {
631             return ret;
632         }
633
634         filename = tmp_filename;
635         drv = bdrv_qcow2;
636         bs->is_temporary = 1;
637     }
638
639     /* Find the right image format driver */
640     if (!drv) {
641         ret = find_image_format(filename, &drv);
642     }
643
644     if (!drv) {
645         goto unlink_and_fail;
646     }
647
648     /* Open the image */
649     ret = bdrv_open_common(bs, filename, flags, drv);
650     if (ret < 0) {
651         goto unlink_and_fail;
652     }
653
654     /* If there is a backing file, use it */
655     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
656         char backing_filename[PATH_MAX];
657         int back_flags;
658         BlockDriver *back_drv = NULL;
659
660         bs->backing_hd = bdrv_new("");
661
662         if (path_has_protocol(bs->backing_file)) {
663             pstrcpy(backing_filename, sizeof(backing_filename),
664                     bs->backing_file);
665         } else {
666             path_combine(backing_filename, sizeof(backing_filename),
667                          filename, bs->backing_file);
668         }
669
670         if (bs->backing_format[0] != '\0') {
671             back_drv = bdrv_find_format(bs->backing_format);
672         }
673
674         /* backing files always opened read-only */
675         back_flags =
676             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
677
678         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
679         if (ret < 0) {
680             bdrv_close(bs);
681             return ret;
682         }
683         if (bs->is_temporary) {
684             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
685         } else {
686             /* base image inherits from "parent" */
687             bs->backing_hd->keep_read_only = bs->keep_read_only;
688         }
689     }
690
691     if (!bdrv_key_required(bs)) {
692         bdrv_dev_change_media_cb(bs);
693     }
694
695     return 0;
696
697 unlink_and_fail:
698     if (bs->is_temporary) {
699         unlink(filename);
700     }
701     return ret;
702 }
703
704 void bdrv_close(BlockDriverState *bs)
705 {
706     if (bs->drv) {
707         if (bs == bs_snapshots) {
708             bs_snapshots = NULL;
709         }
710         if (bs->backing_hd) {
711             bdrv_delete(bs->backing_hd);
712             bs->backing_hd = NULL;
713         }
714         bs->drv->bdrv_close(bs);
715         g_free(bs->opaque);
716 #ifdef _WIN32
717         if (bs->is_temporary) {
718             unlink(bs->filename);
719         }
720 #endif
721         bs->opaque = NULL;
722         bs->drv = NULL;
723
724         if (bs->file != NULL) {
725             bdrv_close(bs->file);
726         }
727
728         bdrv_dev_change_media_cb(bs);
729     }
730 }
731
732 void bdrv_close_all(void)
733 {
734     BlockDriverState *bs;
735
736     QTAILQ_FOREACH(bs, &bdrv_states, list) {
737         bdrv_close(bs);
738     }
739 }
740
741 /* make a BlockDriverState anonymous by removing from bdrv_state list.
742    Also, NULL terminate the device_name to prevent double remove */
743 void bdrv_make_anon(BlockDriverState *bs)
744 {
745     if (bs->device_name[0] != '\0') {
746         QTAILQ_REMOVE(&bdrv_states, bs, list);
747     }
748     bs->device_name[0] = '\0';
749 }
750
751 void bdrv_delete(BlockDriverState *bs)
752 {
753     assert(!bs->dev);
754
755     /* remove from list, if necessary */
756     bdrv_make_anon(bs);
757
758     bdrv_close(bs);
759     if (bs->file != NULL) {
760         bdrv_delete(bs->file);
761     }
762
763     assert(bs != bs_snapshots);
764     g_free(bs);
765 }
766
767 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
768 /* TODO change to DeviceState *dev when all users are qdevified */
769 {
770     if (bs->dev) {
771         return -EBUSY;
772     }
773     bs->dev = dev;
774     return 0;
775 }
776
777 /* TODO qdevified devices don't use this, remove when devices are qdevified */
778 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
779 {
780     if (bdrv_attach_dev(bs, dev) < 0) {
781         abort();
782     }
783 }
784
785 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
786 /* TODO change to DeviceState *dev when all users are qdevified */
787 {
788     assert(bs->dev == dev);
789     bs->dev = NULL;
790     bs->dev_ops = NULL;
791     bs->dev_opaque = NULL;
792 }
793
794 /* TODO change to return DeviceState * when all users are qdevified */
795 void *bdrv_get_attached_dev(BlockDriverState *bs)
796 {
797     return bs->dev;
798 }
799
800 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
801                       void *opaque)
802 {
803     bs->dev_ops = ops;
804     bs->dev_opaque = opaque;
805 }
806
807 static void bdrv_dev_change_media_cb(BlockDriverState *bs)
808 {
809     if (bs->dev_ops && bs->dev_ops->change_media_cb) {
810         bs->dev_ops->change_media_cb(bs->dev_opaque);
811     }
812 }
813
814 static void bdrv_dev_resize_cb(BlockDriverState *bs)
815 {
816     if (bs->dev_ops && bs->dev_ops->resize_cb) {
817         bs->dev_ops->resize_cb(bs->dev_opaque);
818     }
819 }
820
821 /*
822  * Run consistency checks on an image
823  *
824  * Returns 0 if the check could be completed (it doesn't mean that the image is
825  * free of errors) or -errno when an internal error occurred. The results of the
826  * check are stored in res.
827  */
828 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
829 {
830     if (bs->drv->bdrv_check == NULL) {
831         return -ENOTSUP;
832     }
833
834     memset(res, 0, sizeof(*res));
835     return bs->drv->bdrv_check(bs, res);
836 }
837
838 #define COMMIT_BUF_SECTORS 2048
839
840 /* commit COW file into the raw image */
841 int bdrv_commit(BlockDriverState *bs)
842 {
843     BlockDriver *drv = bs->drv;
844     BlockDriver *backing_drv;
845     int64_t sector, total_sectors;
846     int n, ro, open_flags;
847     int ret = 0, rw_ret = 0;
848     uint8_t *buf;
849     char filename[1024];
850     BlockDriverState *bs_rw, *bs_ro;
851
852     if (!drv)
853         return -ENOMEDIUM;
854     
855     if (!bs->backing_hd) {
856         return -ENOTSUP;
857     }
858
859     if (bs->backing_hd->keep_read_only) {
860         return -EACCES;
861     }
862
863     backing_drv = bs->backing_hd->drv;
864     ro = bs->backing_hd->read_only;
865     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
866     open_flags =  bs->backing_hd->open_flags;
867
868     if (ro) {
869         /* re-open as RW */
870         bdrv_delete(bs->backing_hd);
871         bs->backing_hd = NULL;
872         bs_rw = bdrv_new("");
873         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
874             backing_drv);
875         if (rw_ret < 0) {
876             bdrv_delete(bs_rw);
877             /* try to re-open read-only */
878             bs_ro = bdrv_new("");
879             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
880                 backing_drv);
881             if (ret < 0) {
882                 bdrv_delete(bs_ro);
883                 /* drive not functional anymore */
884                 bs->drv = NULL;
885                 return ret;
886             }
887             bs->backing_hd = bs_ro;
888             return rw_ret;
889         }
890         bs->backing_hd = bs_rw;
891     }
892
893     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
894     buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
895
896     for (sector = 0; sector < total_sectors; sector += n) {
897         if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
898
899             if (bdrv_read(bs, sector, buf, n) != 0) {
900                 ret = -EIO;
901                 goto ro_cleanup;
902             }
903
904             if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
905                 ret = -EIO;
906                 goto ro_cleanup;
907             }
908         }
909     }
910
911     if (drv->bdrv_make_empty) {
912         ret = drv->bdrv_make_empty(bs);
913         bdrv_flush(bs);
914     }
915
916     /*
917      * Make sure all data we wrote to the backing device is actually
918      * stable on disk.
919      */
920     if (bs->backing_hd)
921         bdrv_flush(bs->backing_hd);
922
923 ro_cleanup:
924     g_free(buf);
925
926     if (ro) {
927         /* re-open as RO */
928         bdrv_delete(bs->backing_hd);
929         bs->backing_hd = NULL;
930         bs_ro = bdrv_new("");
931         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
932             backing_drv);
933         if (ret < 0) {
934             bdrv_delete(bs_ro);
935             /* drive not functional anymore */
936             bs->drv = NULL;
937             return ret;
938         }
939         bs->backing_hd = bs_ro;
940         bs->backing_hd->keep_read_only = 0;
941     }
942
943     return ret;
944 }
945
946 void bdrv_commit_all(void)
947 {
948     BlockDriverState *bs;
949
950     QTAILQ_FOREACH(bs, &bdrv_states, list) {
951         bdrv_commit(bs);
952     }
953 }
954
955 /*
956  * Return values:
957  * 0        - success
958  * -EINVAL  - backing format specified, but no file
959  * -ENOSPC  - can't update the backing file because no space is left in the
960  *            image file header
961  * -ENOTSUP - format driver doesn't support changing the backing file
962  */
963 int bdrv_change_backing_file(BlockDriverState *bs,
964     const char *backing_file, const char *backing_fmt)
965 {
966     BlockDriver *drv = bs->drv;
967
968     if (drv->bdrv_change_backing_file != NULL) {
969         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
970     } else {
971         return -ENOTSUP;
972     }
973 }
974
975 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
976                                    size_t size)
977 {
978     int64_t len;
979
980     if (!bdrv_is_inserted(bs))
981         return -ENOMEDIUM;
982
983     if (bs->growable)
984         return 0;
985
986     len = bdrv_getlength(bs);
987
988     if (offset < 0)
989         return -EIO;
990
991     if ((offset > len) || (len - offset < size))
992         return -EIO;
993
994     return 0;
995 }
996
997 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
998                               int nb_sectors)
999 {
1000     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1001                                    nb_sectors * BDRV_SECTOR_SIZE);
1002 }
1003
1004 static inline bool bdrv_has_async_rw(BlockDriver *drv)
1005 {
1006     return drv->bdrv_co_readv != bdrv_co_readv_em
1007         || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1008 }
1009
1010 static inline bool bdrv_has_async_flush(BlockDriver *drv)
1011 {
1012     return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1013 }
1014
1015 /* return < 0 if error. See bdrv_write() for the return codes */
1016 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1017               uint8_t *buf, int nb_sectors)
1018 {
1019     BlockDriver *drv = bs->drv;
1020
1021     if (!drv)
1022         return -ENOMEDIUM;
1023
1024     if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1025         QEMUIOVector qiov;
1026         struct iovec iov = {
1027             .iov_base = (void *)buf,
1028             .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1029         };
1030
1031         qemu_iovec_init_external(&qiov, &iov, 1);
1032         return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
1033     }
1034
1035     if (bdrv_check_request(bs, sector_num, nb_sectors))
1036         return -EIO;
1037
1038     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
1039 }
1040
1041 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1042                              int nb_sectors, int dirty)
1043 {
1044     int64_t start, end;
1045     unsigned long val, idx, bit;
1046
1047     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1048     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1049
1050     for (; start <= end; start++) {
1051         idx = start / (sizeof(unsigned long) * 8);
1052         bit = start % (sizeof(unsigned long) * 8);
1053         val = bs->dirty_bitmap[idx];
1054         if (dirty) {
1055             if (!(val & (1UL << bit))) {
1056                 bs->dirty_count++;
1057                 val |= 1UL << bit;
1058             }
1059         } else {
1060             if (val & (1UL << bit)) {
1061                 bs->dirty_count--;
1062                 val &= ~(1UL << bit);
1063             }
1064         }
1065         bs->dirty_bitmap[idx] = val;
1066     }
1067 }
1068
1069 /* Return < 0 if error. Important errors are:
1070   -EIO         generic I/O error (may happen for all errors)
1071   -ENOMEDIUM   No media inserted.
1072   -EINVAL      Invalid sector number or nb_sectors
1073   -EACCES      Trying to write a read-only device
1074 */
1075 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1076                const uint8_t *buf, int nb_sectors)
1077 {
1078     BlockDriver *drv = bs->drv;
1079
1080     if (!bs->drv)
1081         return -ENOMEDIUM;
1082
1083     if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1084         QEMUIOVector qiov;
1085         struct iovec iov = {
1086             .iov_base = (void *)buf,
1087             .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1088         };
1089
1090         qemu_iovec_init_external(&qiov, &iov, 1);
1091         return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1092     }
1093
1094     if (bs->read_only)
1095         return -EACCES;
1096     if (bdrv_check_request(bs, sector_num, nb_sectors))
1097         return -EIO;
1098
1099     if (bs->dirty_bitmap) {
1100         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1101     }
1102
1103     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1104         bs->wr_highest_sector = sector_num + nb_sectors - 1;
1105     }
1106
1107     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1108 }
1109
1110 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1111                void *buf, int count1)
1112 {
1113     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1114     int len, nb_sectors, count;
1115     int64_t sector_num;
1116     int ret;
1117
1118     count = count1;
1119     /* first read to align to sector start */
1120     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1121     if (len > count)
1122         len = count;
1123     sector_num = offset >> BDRV_SECTOR_BITS;
1124     if (len > 0) {
1125         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1126             return ret;
1127         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1128         count -= len;
1129         if (count == 0)
1130             return count1;
1131         sector_num++;
1132         buf += len;
1133     }
1134
1135     /* read the sectors "in place" */
1136     nb_sectors = count >> BDRV_SECTOR_BITS;
1137     if (nb_sectors > 0) {
1138         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1139             return ret;
1140         sector_num += nb_sectors;
1141         len = nb_sectors << BDRV_SECTOR_BITS;
1142         buf += len;
1143         count -= len;
1144     }
1145
1146     /* add data from the last sector */
1147     if (count > 0) {
1148         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1149             return ret;
1150         memcpy(buf, tmp_buf, count);
1151     }
1152     return count1;
1153 }
1154
1155 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1156                 const void *buf, int count1)
1157 {
1158     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1159     int len, nb_sectors, count;
1160     int64_t sector_num;
1161     int ret;
1162
1163     count = count1;
1164     /* first write to align to sector start */
1165     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1166     if (len > count)
1167         len = count;
1168     sector_num = offset >> BDRV_SECTOR_BITS;
1169     if (len > 0) {
1170         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1171             return ret;
1172         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1173         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1174             return ret;
1175         count -= len;
1176         if (count == 0)
1177             return count1;
1178         sector_num++;
1179         buf += len;
1180     }
1181
1182     /* write the sectors "in place" */
1183     nb_sectors = count >> BDRV_SECTOR_BITS;
1184     if (nb_sectors > 0) {
1185         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1186             return ret;
1187         sector_num += nb_sectors;
1188         len = nb_sectors << BDRV_SECTOR_BITS;
1189         buf += len;
1190         count -= len;
1191     }
1192
1193     /* add data from the last sector */
1194     if (count > 0) {
1195         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1196             return ret;
1197         memcpy(tmp_buf, buf, count);
1198         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1199             return ret;
1200     }
1201     return count1;
1202 }
1203
1204 /*
1205  * Writes to the file and ensures that no writes are reordered across this
1206  * request (acts as a barrier)
1207  *
1208  * Returns 0 on success, -errno in error cases.
1209  */
1210 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1211     const void *buf, int count)
1212 {
1213     int ret;
1214
1215     ret = bdrv_pwrite(bs, offset, buf, count);
1216     if (ret < 0) {
1217         return ret;
1218     }
1219
1220     /* No flush needed for cache modes that use O_DSYNC */
1221     if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1222         bdrv_flush(bs);
1223     }
1224
1225     return 0;
1226 }
1227
1228 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1229     int nb_sectors, QEMUIOVector *qiov)
1230 {
1231     BlockDriver *drv = bs->drv;
1232
1233     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1234
1235     if (!drv) {
1236         return -ENOMEDIUM;
1237     }
1238     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1239         return -EIO;
1240     }
1241
1242     return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1243 }
1244
1245 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1246     int nb_sectors, QEMUIOVector *qiov)
1247 {
1248     BlockDriver *drv = bs->drv;
1249
1250     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1251
1252     if (!bs->drv) {
1253         return -ENOMEDIUM;
1254     }
1255     if (bs->read_only) {
1256         return -EACCES;
1257     }
1258     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1259         return -EIO;
1260     }
1261
1262     if (bs->dirty_bitmap) {
1263         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1264     }
1265
1266     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1267         bs->wr_highest_sector = sector_num + nb_sectors - 1;
1268     }
1269
1270     return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1271 }
1272
1273 /**
1274  * Truncate file to 'offset' bytes (needed only for file protocols)
1275  */
1276 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1277 {
1278     BlockDriver *drv = bs->drv;
1279     int ret;
1280     if (!drv)
1281         return -ENOMEDIUM;
1282     if (!drv->bdrv_truncate)
1283         return -ENOTSUP;
1284     if (bs->read_only)
1285         return -EACCES;
1286     if (bdrv_in_use(bs))
1287         return -EBUSY;
1288     ret = drv->bdrv_truncate(bs, offset);
1289     if (ret == 0) {
1290         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1291         bdrv_dev_resize_cb(bs);
1292     }
1293     return ret;
1294 }
1295
1296 /**
1297  * Length of a allocated file in bytes. Sparse files are counted by actual
1298  * allocated space. Return < 0 if error or unknown.
1299  */
1300 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1301 {
1302     BlockDriver *drv = bs->drv;
1303     if (!drv) {
1304         return -ENOMEDIUM;
1305     }
1306     if (drv->bdrv_get_allocated_file_size) {
1307         return drv->bdrv_get_allocated_file_size(bs);
1308     }
1309     if (bs->file) {
1310         return bdrv_get_allocated_file_size(bs->file);
1311     }
1312     return -ENOTSUP;
1313 }
1314
1315 /**
1316  * Length of a file in bytes. Return < 0 if error or unknown.
1317  */
1318 int64_t bdrv_getlength(BlockDriverState *bs)
1319 {
1320     BlockDriver *drv = bs->drv;
1321     if (!drv)
1322         return -ENOMEDIUM;
1323
1324     if (bs->growable || bs->removable) {
1325         if (drv->bdrv_getlength) {
1326             return drv->bdrv_getlength(bs);
1327         }
1328     }
1329     return bs->total_sectors * BDRV_SECTOR_SIZE;
1330 }
1331
1332 /* return 0 as number of sectors if no device present or error */
1333 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1334 {
1335     int64_t length;
1336     length = bdrv_getlength(bs);
1337     if (length < 0)
1338         length = 0;
1339     else
1340         length = length >> BDRV_SECTOR_BITS;
1341     *nb_sectors_ptr = length;
1342 }
1343
1344 struct partition {
1345         uint8_t boot_ind;           /* 0x80 - active */
1346         uint8_t head;               /* starting head */
1347         uint8_t sector;             /* starting sector */
1348         uint8_t cyl;                /* starting cylinder */
1349         uint8_t sys_ind;            /* What partition type */
1350         uint8_t end_head;           /* end head */
1351         uint8_t end_sector;         /* end sector */
1352         uint8_t end_cyl;            /* end cylinder */
1353         uint32_t start_sect;        /* starting sector counting from 0 */
1354         uint32_t nr_sects;          /* nr of sectors in partition */
1355 } QEMU_PACKED;
1356
1357 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1358 static int guess_disk_lchs(BlockDriverState *bs,
1359                            int *pcylinders, int *pheads, int *psectors)
1360 {
1361     uint8_t buf[BDRV_SECTOR_SIZE];
1362     int ret, i, heads, sectors, cylinders;
1363     struct partition *p;
1364     uint32_t nr_sects;
1365     uint64_t nb_sectors;
1366
1367     bdrv_get_geometry(bs, &nb_sectors);
1368
1369     ret = bdrv_read(bs, 0, buf, 1);
1370     if (ret < 0)
1371         return -1;
1372     /* test msdos magic */
1373     if (buf[510] != 0x55 || buf[511] != 0xaa)
1374         return -1;
1375     for(i = 0; i < 4; i++) {
1376         p = ((struct partition *)(buf + 0x1be)) + i;
1377         nr_sects = le32_to_cpu(p->nr_sects);
1378         if (nr_sects && p->end_head) {
1379             /* We make the assumption that the partition terminates on
1380                a cylinder boundary */
1381             heads = p->end_head + 1;
1382             sectors = p->end_sector & 63;
1383             if (sectors == 0)
1384                 continue;
1385             cylinders = nb_sectors / (heads * sectors);
1386             if (cylinders < 1 || cylinders > 16383)
1387                 continue;
1388             *pheads = heads;
1389             *psectors = sectors;
1390             *pcylinders = cylinders;
1391 #if 0
1392             printf("guessed geometry: LCHS=%d %d %d\n",
1393                    cylinders, heads, sectors);
1394 #endif
1395             return 0;
1396         }
1397     }
1398     return -1;
1399 }
1400
1401 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1402 {
1403     int translation, lba_detected = 0;
1404     int cylinders, heads, secs;
1405     uint64_t nb_sectors;
1406
1407     /* if a geometry hint is available, use it */
1408     bdrv_get_geometry(bs, &nb_sectors);
1409     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1410     translation = bdrv_get_translation_hint(bs);
1411     if (cylinders != 0) {
1412         *pcyls = cylinders;
1413         *pheads = heads;
1414         *psecs = secs;
1415     } else {
1416         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1417             if (heads > 16) {
1418                 /* if heads > 16, it means that a BIOS LBA
1419                    translation was active, so the default
1420                    hardware geometry is OK */
1421                 lba_detected = 1;
1422                 goto default_geometry;
1423             } else {
1424                 *pcyls = cylinders;
1425                 *pheads = heads;
1426                 *psecs = secs;
1427                 /* disable any translation to be in sync with
1428                    the logical geometry */
1429                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1430                     bdrv_set_translation_hint(bs,
1431                                               BIOS_ATA_TRANSLATION_NONE);
1432                 }
1433             }
1434         } else {
1435         default_geometry:
1436             /* if no geometry, use a standard physical disk geometry */
1437             cylinders = nb_sectors / (16 * 63);
1438
1439             if (cylinders > 16383)
1440                 cylinders = 16383;
1441             else if (cylinders < 2)
1442                 cylinders = 2;
1443             *pcyls = cylinders;
1444             *pheads = 16;
1445             *psecs = 63;
1446             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1447                 if ((*pcyls * *pheads) <= 131072) {
1448                     bdrv_set_translation_hint(bs,
1449                                               BIOS_ATA_TRANSLATION_LARGE);
1450                 } else {
1451                     bdrv_set_translation_hint(bs,
1452                                               BIOS_ATA_TRANSLATION_LBA);
1453                 }
1454             }
1455         }
1456         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1457     }
1458 }
1459
1460 void bdrv_set_geometry_hint(BlockDriverState *bs,
1461                             int cyls, int heads, int secs)
1462 {
1463     bs->cyls = cyls;
1464     bs->heads = heads;
1465     bs->secs = secs;
1466 }
1467
1468 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1469 {
1470     bs->translation = translation;
1471 }
1472
1473 void bdrv_get_geometry_hint(BlockDriverState *bs,
1474                             int *pcyls, int *pheads, int *psecs)
1475 {
1476     *pcyls = bs->cyls;
1477     *pheads = bs->heads;
1478     *psecs = bs->secs;
1479 }
1480
1481 /* Recognize floppy formats */
1482 typedef struct FDFormat {
1483     FDriveType drive;
1484     uint8_t last_sect;
1485     uint8_t max_track;
1486     uint8_t max_head;
1487 } FDFormat;
1488
1489 static const FDFormat fd_formats[] = {
1490     /* First entry is default format */
1491     /* 1.44 MB 3"1/2 floppy disks */
1492     { FDRIVE_DRV_144, 18, 80, 1, },
1493     { FDRIVE_DRV_144, 20, 80, 1, },
1494     { FDRIVE_DRV_144, 21, 80, 1, },
1495     { FDRIVE_DRV_144, 21, 82, 1, },
1496     { FDRIVE_DRV_144, 21, 83, 1, },
1497     { FDRIVE_DRV_144, 22, 80, 1, },
1498     { FDRIVE_DRV_144, 23, 80, 1, },
1499     { FDRIVE_DRV_144, 24, 80, 1, },
1500     /* 2.88 MB 3"1/2 floppy disks */
1501     { FDRIVE_DRV_288, 36, 80, 1, },
1502     { FDRIVE_DRV_288, 39, 80, 1, },
1503     { FDRIVE_DRV_288, 40, 80, 1, },
1504     { FDRIVE_DRV_288, 44, 80, 1, },
1505     { FDRIVE_DRV_288, 48, 80, 1, },
1506     /* 720 kB 3"1/2 floppy disks */
1507     { FDRIVE_DRV_144,  9, 80, 1, },
1508     { FDRIVE_DRV_144, 10, 80, 1, },
1509     { FDRIVE_DRV_144, 10, 82, 1, },
1510     { FDRIVE_DRV_144, 10, 83, 1, },
1511     { FDRIVE_DRV_144, 13, 80, 1, },
1512     { FDRIVE_DRV_144, 14, 80, 1, },
1513     /* 1.2 MB 5"1/4 floppy disks */
1514     { FDRIVE_DRV_120, 15, 80, 1, },
1515     { FDRIVE_DRV_120, 18, 80, 1, },
1516     { FDRIVE_DRV_120, 18, 82, 1, },
1517     { FDRIVE_DRV_120, 18, 83, 1, },
1518     { FDRIVE_DRV_120, 20, 80, 1, },
1519     /* 720 kB 5"1/4 floppy disks */
1520     { FDRIVE_DRV_120,  9, 80, 1, },
1521     { FDRIVE_DRV_120, 11, 80, 1, },
1522     /* 360 kB 5"1/4 floppy disks */
1523     { FDRIVE_DRV_120,  9, 40, 1, },
1524     { FDRIVE_DRV_120,  9, 40, 0, },
1525     { FDRIVE_DRV_120, 10, 41, 1, },
1526     { FDRIVE_DRV_120, 10, 42, 1, },
1527     /* 320 kB 5"1/4 floppy disks */
1528     { FDRIVE_DRV_120,  8, 40, 1, },
1529     { FDRIVE_DRV_120,  8, 40, 0, },
1530     /* 360 kB must match 5"1/4 better than 3"1/2... */
1531     { FDRIVE_DRV_144,  9, 80, 0, },
1532     /* end */
1533     { FDRIVE_DRV_NONE, -1, -1, 0, },
1534 };
1535
1536 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1537                                    int *max_track, int *last_sect,
1538                                    FDriveType drive_in, FDriveType *drive)
1539 {
1540     const FDFormat *parse;
1541     uint64_t nb_sectors, size;
1542     int i, first_match, match;
1543
1544     bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1545     if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1546         /* User defined disk */
1547     } else {
1548         bdrv_get_geometry(bs, &nb_sectors);
1549         match = -1;
1550         first_match = -1;
1551         for (i = 0; ; i++) {
1552             parse = &fd_formats[i];
1553             if (parse->drive == FDRIVE_DRV_NONE) {
1554                 break;
1555             }
1556             if (drive_in == parse->drive ||
1557                 drive_in == FDRIVE_DRV_NONE) {
1558                 size = (parse->max_head + 1) * parse->max_track *
1559                     parse->last_sect;
1560                 if (nb_sectors == size) {
1561                     match = i;
1562                     break;
1563                 }
1564                 if (first_match == -1) {
1565                     first_match = i;
1566                 }
1567             }
1568         }
1569         if (match == -1) {
1570             if (first_match == -1) {
1571                 match = 1;
1572             } else {
1573                 match = first_match;
1574             }
1575             parse = &fd_formats[match];
1576         }
1577         *nb_heads = parse->max_head + 1;
1578         *max_track = parse->max_track;
1579         *last_sect = parse->last_sect;
1580         *drive = parse->drive;
1581     }
1582 }
1583
1584 int bdrv_get_translation_hint(BlockDriverState *bs)
1585 {
1586     return bs->translation;
1587 }
1588
1589 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1590                        BlockErrorAction on_write_error)
1591 {
1592     bs->on_read_error = on_read_error;
1593     bs->on_write_error = on_write_error;
1594 }
1595
1596 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1597 {
1598     return is_read ? bs->on_read_error : bs->on_write_error;
1599 }
1600
1601 void bdrv_set_removable(BlockDriverState *bs, int removable)
1602 {
1603     bs->removable = removable;
1604     if (removable && bs == bs_snapshots) {
1605         bs_snapshots = NULL;
1606     }
1607 }
1608
1609 int bdrv_is_removable(BlockDriverState *bs)
1610 {
1611     return bs->removable;
1612 }
1613
1614 int bdrv_is_read_only(BlockDriverState *bs)
1615 {
1616     return bs->read_only;
1617 }
1618
1619 int bdrv_is_sg(BlockDriverState *bs)
1620 {
1621     return bs->sg;
1622 }
1623
1624 int bdrv_enable_write_cache(BlockDriverState *bs)
1625 {
1626     return bs->enable_write_cache;
1627 }
1628
1629 int bdrv_is_encrypted(BlockDriverState *bs)
1630 {
1631     if (bs->backing_hd && bs->backing_hd->encrypted)
1632         return 1;
1633     return bs->encrypted;
1634 }
1635
1636 int bdrv_key_required(BlockDriverState *bs)
1637 {
1638     BlockDriverState *backing_hd = bs->backing_hd;
1639
1640     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1641         return 1;
1642     return (bs->encrypted && !bs->valid_key);
1643 }
1644
1645 int bdrv_set_key(BlockDriverState *bs, const char *key)
1646 {
1647     int ret;
1648     if (bs->backing_hd && bs->backing_hd->encrypted) {
1649         ret = bdrv_set_key(bs->backing_hd, key);
1650         if (ret < 0)
1651             return ret;
1652         if (!bs->encrypted)
1653             return 0;
1654     }
1655     if (!bs->encrypted) {
1656         return -EINVAL;
1657     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1658         return -ENOMEDIUM;
1659     }
1660     ret = bs->drv->bdrv_set_key(bs, key);
1661     if (ret < 0) {
1662         bs->valid_key = 0;
1663     } else if (!bs->valid_key) {
1664         bs->valid_key = 1;
1665         /* call the change callback now, we skipped it on open */
1666         bdrv_dev_change_media_cb(bs);
1667     }
1668     return ret;
1669 }
1670
1671 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1672 {
1673     if (!bs->drv) {
1674         buf[0] = '\0';
1675     } else {
1676         pstrcpy(buf, buf_size, bs->drv->format_name);
1677     }
1678 }
1679
1680 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1681                          void *opaque)
1682 {
1683     BlockDriver *drv;
1684
1685     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1686         it(opaque, drv->format_name);
1687     }
1688 }
1689
1690 BlockDriverState *bdrv_find(const char *name)
1691 {
1692     BlockDriverState *bs;
1693
1694     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1695         if (!strcmp(name, bs->device_name)) {
1696             return bs;
1697         }
1698     }
1699     return NULL;
1700 }
1701
1702 BlockDriverState *bdrv_next(BlockDriverState *bs)
1703 {
1704     if (!bs) {
1705         return QTAILQ_FIRST(&bdrv_states);
1706     }
1707     return QTAILQ_NEXT(bs, list);
1708 }
1709
1710 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1711 {
1712     BlockDriverState *bs;
1713
1714     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1715         it(opaque, bs);
1716     }
1717 }
1718
1719 const char *bdrv_get_device_name(BlockDriverState *bs)
1720 {
1721     return bs->device_name;
1722 }
1723
1724 int bdrv_flush(BlockDriverState *bs)
1725 {
1726     if (bs->open_flags & BDRV_O_NO_FLUSH) {
1727         return 0;
1728     }
1729
1730     if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1731         return bdrv_co_flush_em(bs);
1732     }
1733
1734     if (bs->drv && bs->drv->bdrv_flush) {
1735         return bs->drv->bdrv_flush(bs);
1736     }
1737
1738     /*
1739      * Some block drivers always operate in either writethrough or unsafe mode
1740      * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1741      * the server works (because the behaviour is hardcoded or depends on
1742      * server-side configuration), so we can't ensure that everything is safe
1743      * on disk. Returning an error doesn't work because that would break guests
1744      * even if the server operates in writethrough mode.
1745      *
1746      * Let's hope the user knows what he's doing.
1747      */
1748     return 0;
1749 }
1750
1751 void bdrv_flush_all(void)
1752 {
1753     BlockDriverState *bs;
1754
1755     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1756         if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1757             bdrv_flush(bs);
1758         }
1759     }
1760 }
1761
1762 int bdrv_has_zero_init(BlockDriverState *bs)
1763 {
1764     assert(bs->drv);
1765
1766     if (bs->drv->bdrv_has_zero_init) {
1767         return bs->drv->bdrv_has_zero_init(bs);
1768     }
1769
1770     return 1;
1771 }
1772
1773 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1774 {
1775     if (!bs->drv) {
1776         return -ENOMEDIUM;
1777     }
1778     if (!bs->drv->bdrv_discard) {
1779         return 0;
1780     }
1781     return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1782 }
1783
1784 /*
1785  * Returns true iff the specified sector is present in the disk image. Drivers
1786  * not implementing the functionality are assumed to not support backing files,
1787  * hence all their sectors are reported as allocated.
1788  *
1789  * 'pnum' is set to the number of sectors (including and immediately following
1790  * the specified sector) that are known to be in the same
1791  * allocated/unallocated state.
1792  *
1793  * 'nb_sectors' is the max value 'pnum' should be set to.
1794  */
1795 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1796         int *pnum)
1797 {
1798     int64_t n;
1799     if (!bs->drv->bdrv_is_allocated) {
1800         if (sector_num >= bs->total_sectors) {
1801             *pnum = 0;
1802             return 0;
1803         }
1804         n = bs->total_sectors - sector_num;
1805         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1806         return 1;
1807     }
1808     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1809 }
1810
1811 void bdrv_mon_event(const BlockDriverState *bdrv,
1812                     BlockMonEventAction action, int is_read)
1813 {
1814     QObject *data;
1815     const char *action_str;
1816
1817     switch (action) {
1818     case BDRV_ACTION_REPORT:
1819         action_str = "report";
1820         break;
1821     case BDRV_ACTION_IGNORE:
1822         action_str = "ignore";
1823         break;
1824     case BDRV_ACTION_STOP:
1825         action_str = "stop";
1826         break;
1827     default:
1828         abort();
1829     }
1830
1831     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1832                               bdrv->device_name,
1833                               action_str,
1834                               is_read ? "read" : "write");
1835     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1836
1837     qobject_decref(data);
1838 }
1839
1840 static void bdrv_print_dict(QObject *obj, void *opaque)
1841 {
1842     QDict *bs_dict;
1843     Monitor *mon = opaque;
1844
1845     bs_dict = qobject_to_qdict(obj);
1846
1847     monitor_printf(mon, "%s: removable=%d",
1848                         qdict_get_str(bs_dict, "device"),
1849                         qdict_get_bool(bs_dict, "removable"));
1850
1851     if (qdict_get_bool(bs_dict, "removable")) {
1852         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1853     }
1854
1855     if (qdict_haskey(bs_dict, "inserted")) {
1856         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1857
1858         monitor_printf(mon, " file=");
1859         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1860         if (qdict_haskey(qdict, "backing_file")) {
1861             monitor_printf(mon, " backing_file=");
1862             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1863         }
1864         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1865                             qdict_get_bool(qdict, "ro"),
1866                             qdict_get_str(qdict, "drv"),
1867                             qdict_get_bool(qdict, "encrypted"));
1868     } else {
1869         monitor_printf(mon, " [not inserted]");
1870     }
1871
1872     monitor_printf(mon, "\n");
1873 }
1874
1875 void bdrv_info_print(Monitor *mon, const QObject *data)
1876 {
1877     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1878 }
1879
1880 void bdrv_info(Monitor *mon, QObject **ret_data)
1881 {
1882     QList *bs_list;
1883     BlockDriverState *bs;
1884
1885     bs_list = qlist_new();
1886
1887     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1888         QObject *bs_obj;
1889
1890         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1891                                     "'removable': %i, 'locked': %i }",
1892                                     bs->device_name, bs->removable,
1893                                     bs->locked);
1894
1895         if (bs->drv) {
1896             QObject *obj;
1897             QDict *bs_dict = qobject_to_qdict(bs_obj);
1898
1899             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1900                                      "'encrypted': %i }",
1901                                      bs->filename, bs->read_only,
1902                                      bs->drv->format_name,
1903                                      bdrv_is_encrypted(bs));
1904             if (bs->backing_file[0] != '\0') {
1905                 QDict *qdict = qobject_to_qdict(obj);
1906                 qdict_put(qdict, "backing_file",
1907                           qstring_from_str(bs->backing_file));
1908             }
1909
1910             qdict_put_obj(bs_dict, "inserted", obj);
1911         }
1912         qlist_append_obj(bs_list, bs_obj);
1913     }
1914
1915     *ret_data = QOBJECT(bs_list);
1916 }
1917
1918 static void bdrv_stats_iter(QObject *data, void *opaque)
1919 {
1920     QDict *qdict;
1921     Monitor *mon = opaque;
1922
1923     qdict = qobject_to_qdict(data);
1924     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1925
1926     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1927     monitor_printf(mon, " rd_bytes=%" PRId64
1928                         " wr_bytes=%" PRId64
1929                         " rd_operations=%" PRId64
1930                         " wr_operations=%" PRId64
1931                         " flush_operations=%" PRId64
1932                         " wr_total_time_ns=%" PRId64
1933                         " rd_total_time_ns=%" PRId64
1934                         " flush_total_time_ns=%" PRId64
1935                         "\n",
1936                         qdict_get_int(qdict, "rd_bytes"),
1937                         qdict_get_int(qdict, "wr_bytes"),
1938                         qdict_get_int(qdict, "rd_operations"),
1939                         qdict_get_int(qdict, "wr_operations"),
1940                         qdict_get_int(qdict, "flush_operations"),
1941                         qdict_get_int(qdict, "wr_total_time_ns"),
1942                         qdict_get_int(qdict, "rd_total_time_ns"),
1943                         qdict_get_int(qdict, "flush_total_time_ns"));
1944 }
1945
1946 void bdrv_stats_print(Monitor *mon, const QObject *data)
1947 {
1948     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1949 }
1950
1951 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1952 {
1953     QObject *res;
1954     QDict *dict;
1955
1956     res = qobject_from_jsonf("{ 'stats': {"
1957                              "'rd_bytes': %" PRId64 ","
1958                              "'wr_bytes': %" PRId64 ","
1959                              "'rd_operations': %" PRId64 ","
1960                              "'wr_operations': %" PRId64 ","
1961                              "'wr_highest_offset': %" PRId64 ","
1962                              "'flush_operations': %" PRId64 ","
1963                              "'wr_total_time_ns': %" PRId64 ","
1964                              "'rd_total_time_ns': %" PRId64 ","
1965                              "'flush_total_time_ns': %" PRId64
1966                              "} }",
1967                              bs->nr_bytes[BDRV_ACCT_READ],
1968                              bs->nr_bytes[BDRV_ACCT_WRITE],
1969                              bs->nr_ops[BDRV_ACCT_READ],
1970                              bs->nr_ops[BDRV_ACCT_WRITE],
1971                              bs->wr_highest_sector *
1972                              (uint64_t)BDRV_SECTOR_SIZE,
1973                              bs->nr_ops[BDRV_ACCT_FLUSH],
1974                              bs->total_time_ns[BDRV_ACCT_WRITE],
1975                              bs->total_time_ns[BDRV_ACCT_READ],
1976                              bs->total_time_ns[BDRV_ACCT_FLUSH]);
1977     dict  = qobject_to_qdict(res);
1978
1979     if (*bs->device_name) {
1980         qdict_put(dict, "device", qstring_from_str(bs->device_name));
1981     }
1982
1983     if (bs->file) {
1984         QObject *parent = bdrv_info_stats_bs(bs->file);
1985         qdict_put_obj(dict, "parent", parent);
1986     }
1987
1988     return res;
1989 }
1990
1991 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1992 {
1993     QObject *obj;
1994     QList *devices;
1995     BlockDriverState *bs;
1996
1997     devices = qlist_new();
1998
1999     QTAILQ_FOREACH(bs, &bdrv_states, list) {
2000         obj = bdrv_info_stats_bs(bs);
2001         qlist_append_obj(devices, obj);
2002     }
2003
2004     *ret_data = QOBJECT(devices);
2005 }
2006
2007 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2008 {
2009     if (bs->backing_hd && bs->backing_hd->encrypted)
2010         return bs->backing_file;
2011     else if (bs->encrypted)
2012         return bs->filename;
2013     else
2014         return NULL;
2015 }
2016
2017 void bdrv_get_backing_filename(BlockDriverState *bs,
2018                                char *filename, int filename_size)
2019 {
2020     if (!bs->backing_file) {
2021         pstrcpy(filename, filename_size, "");
2022     } else {
2023         pstrcpy(filename, filename_size, bs->backing_file);
2024     }
2025 }
2026
2027 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2028                           const uint8_t *buf, int nb_sectors)
2029 {
2030     BlockDriver *drv = bs->drv;
2031     if (!drv)
2032         return -ENOMEDIUM;
2033     if (!drv->bdrv_write_compressed)
2034         return -ENOTSUP;
2035     if (bdrv_check_request(bs, sector_num, nb_sectors))
2036         return -EIO;
2037
2038     if (bs->dirty_bitmap) {
2039         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2040     }
2041
2042     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2043 }
2044
2045 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2046 {
2047     BlockDriver *drv = bs->drv;
2048     if (!drv)
2049         return -ENOMEDIUM;
2050     if (!drv->bdrv_get_info)
2051         return -ENOTSUP;
2052     memset(bdi, 0, sizeof(*bdi));
2053     return drv->bdrv_get_info(bs, bdi);
2054 }
2055
2056 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2057                       int64_t pos, int size)
2058 {
2059     BlockDriver *drv = bs->drv;
2060     if (!drv)
2061         return -ENOMEDIUM;
2062     if (drv->bdrv_save_vmstate)
2063         return drv->bdrv_save_vmstate(bs, buf, pos, size);
2064     if (bs->file)
2065         return bdrv_save_vmstate(bs->file, buf, pos, size);
2066     return -ENOTSUP;
2067 }
2068
2069 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2070                       int64_t pos, int size)
2071 {
2072     BlockDriver *drv = bs->drv;
2073     if (!drv)
2074         return -ENOMEDIUM;
2075     if (drv->bdrv_load_vmstate)
2076         return drv->bdrv_load_vmstate(bs, buf, pos, size);
2077     if (bs->file)
2078         return bdrv_load_vmstate(bs->file, buf, pos, size);
2079     return -ENOTSUP;
2080 }
2081
2082 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2083 {
2084     BlockDriver *drv = bs->drv;
2085
2086     if (!drv || !drv->bdrv_debug_event) {
2087         return;
2088     }
2089
2090     return drv->bdrv_debug_event(bs, event);
2091
2092 }
2093
2094 /**************************************************************/
2095 /* handling of snapshots */
2096
2097 int bdrv_can_snapshot(BlockDriverState *bs)
2098 {
2099     BlockDriver *drv = bs->drv;
2100     if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2101         return 0;
2102     }
2103
2104     if (!drv->bdrv_snapshot_create) {
2105         if (bs->file != NULL) {
2106             return bdrv_can_snapshot(bs->file);
2107         }
2108         return 0;
2109     }
2110
2111     return 1;
2112 }
2113
2114 int bdrv_is_snapshot(BlockDriverState *bs)
2115 {
2116     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2117 }
2118
2119 BlockDriverState *bdrv_snapshots(void)
2120 {
2121     BlockDriverState *bs;
2122
2123     if (bs_snapshots) {
2124         return bs_snapshots;
2125     }
2126
2127     bs = NULL;
2128     while ((bs = bdrv_next(bs))) {
2129         if (bdrv_can_snapshot(bs)) {
2130             bs_snapshots = bs;
2131             return bs;
2132         }
2133     }
2134     return NULL;
2135 }
2136
2137 int bdrv_snapshot_create(BlockDriverState *bs,
2138                          QEMUSnapshotInfo *sn_info)
2139 {
2140     BlockDriver *drv = bs->drv;
2141     if (!drv)
2142         return -ENOMEDIUM;
2143     if (drv->bdrv_snapshot_create)
2144         return drv->bdrv_snapshot_create(bs, sn_info);
2145     if (bs->file)
2146         return bdrv_snapshot_create(bs->file, sn_info);
2147     return -ENOTSUP;
2148 }
2149
2150 int bdrv_snapshot_goto(BlockDriverState *bs,
2151                        const char *snapshot_id)
2152 {
2153     BlockDriver *drv = bs->drv;
2154     int ret, open_ret;
2155
2156     if (!drv)
2157         return -ENOMEDIUM;
2158     if (drv->bdrv_snapshot_goto)
2159         return drv->bdrv_snapshot_goto(bs, snapshot_id);
2160
2161     if (bs->file) {
2162         drv->bdrv_close(bs);
2163         ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2164         open_ret = drv->bdrv_open(bs, bs->open_flags);
2165         if (open_ret < 0) {
2166             bdrv_delete(bs->file);
2167             bs->drv = NULL;
2168             return open_ret;
2169         }
2170         return ret;
2171     }
2172
2173     return -ENOTSUP;
2174 }
2175
2176 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2177 {
2178     BlockDriver *drv = bs->drv;
2179     if (!drv)
2180         return -ENOMEDIUM;
2181     if (drv->bdrv_snapshot_delete)
2182         return drv->bdrv_snapshot_delete(bs, snapshot_id);
2183     if (bs->file)
2184         return bdrv_snapshot_delete(bs->file, snapshot_id);
2185     return -ENOTSUP;
2186 }
2187
2188 int bdrv_snapshot_list(BlockDriverState *bs,
2189                        QEMUSnapshotInfo **psn_info)
2190 {
2191     BlockDriver *drv = bs->drv;
2192     if (!drv)
2193         return -ENOMEDIUM;
2194     if (drv->bdrv_snapshot_list)
2195         return drv->bdrv_snapshot_list(bs, psn_info);
2196     if (bs->file)
2197         return bdrv_snapshot_list(bs->file, psn_info);
2198     return -ENOTSUP;
2199 }
2200
2201 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2202         const char *snapshot_name)
2203 {
2204     BlockDriver *drv = bs->drv;
2205     if (!drv) {
2206         return -ENOMEDIUM;
2207     }
2208     if (!bs->read_only) {
2209         return -EINVAL;
2210     }
2211     if (drv->bdrv_snapshot_load_tmp) {
2212         return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2213     }
2214     return -ENOTSUP;
2215 }
2216
2217 #define NB_SUFFIXES 4
2218
2219 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2220 {
2221     static const char suffixes[NB_SUFFIXES] = "KMGT";
2222     int64_t base;
2223     int i;
2224
2225     if (size <= 999) {
2226         snprintf(buf, buf_size, "%" PRId64, size);
2227     } else {
2228         base = 1024;
2229         for(i = 0; i < NB_SUFFIXES; i++) {
2230             if (size < (10 * base)) {
2231                 snprintf(buf, buf_size, "%0.1f%c",
2232                          (double)size / base,
2233                          suffixes[i]);
2234                 break;
2235             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2236                 snprintf(buf, buf_size, "%" PRId64 "%c",
2237                          ((size + (base >> 1)) / base),
2238                          suffixes[i]);
2239                 break;
2240             }
2241             base = base * 1024;
2242         }
2243     }
2244     return buf;
2245 }
2246
2247 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2248 {
2249     char buf1[128], date_buf[128], clock_buf[128];
2250 #ifdef _WIN32
2251     struct tm *ptm;
2252 #else
2253     struct tm tm;
2254 #endif
2255     time_t ti;
2256     int64_t secs;
2257
2258     if (!sn) {
2259         snprintf(buf, buf_size,
2260                  "%-10s%-20s%7s%20s%15s",
2261                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2262     } else {
2263         ti = sn->date_sec;
2264 #ifdef _WIN32
2265         ptm = localtime(&ti);
2266         strftime(date_buf, sizeof(date_buf),
2267                  "%Y-%m-%d %H:%M:%S", ptm);
2268 #else
2269         localtime_r(&ti, &tm);
2270         strftime(date_buf, sizeof(date_buf),
2271                  "%Y-%m-%d %H:%M:%S", &tm);
2272 #endif
2273         secs = sn->vm_clock_nsec / 1000000000;
2274         snprintf(clock_buf, sizeof(clock_buf),
2275                  "%02d:%02d:%02d.%03d",
2276                  (int)(secs / 3600),
2277                  (int)((secs / 60) % 60),
2278                  (int)(secs % 60),
2279                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
2280         snprintf(buf, buf_size,
2281                  "%-10s%-20s%7s%20s%15s",
2282                  sn->id_str, sn->name,
2283                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2284                  date_buf,
2285                  clock_buf);
2286     }
2287     return buf;
2288 }
2289
2290 /**************************************************************/
2291 /* async I/Os */
2292
2293 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2294                                  QEMUIOVector *qiov, int nb_sectors,
2295                                  BlockDriverCompletionFunc *cb, void *opaque)
2296 {
2297     BlockDriver *drv = bs->drv;
2298
2299     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2300
2301     if (!drv)
2302         return NULL;
2303     if (bdrv_check_request(bs, sector_num, nb_sectors))
2304         return NULL;
2305
2306     return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2307                                cb, opaque);
2308 }
2309
2310 typedef struct BlockCompleteData {
2311     BlockDriverCompletionFunc *cb;
2312     void *opaque;
2313     BlockDriverState *bs;
2314     int64_t sector_num;
2315     int nb_sectors;
2316 } BlockCompleteData;
2317
2318 static void block_complete_cb(void *opaque, int ret)
2319 {
2320     BlockCompleteData *b = opaque;
2321
2322     if (b->bs->dirty_bitmap) {
2323         set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2324     }
2325     b->cb(b->opaque, ret);
2326     g_free(b);
2327 }
2328
2329 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2330                                              int64_t sector_num,
2331                                              int nb_sectors,
2332                                              BlockDriverCompletionFunc *cb,
2333                                              void *opaque)
2334 {
2335     BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
2336
2337     blkdata->bs = bs;
2338     blkdata->cb = cb;
2339     blkdata->opaque = opaque;
2340     blkdata->sector_num = sector_num;
2341     blkdata->nb_sectors = nb_sectors;
2342
2343     return blkdata;
2344 }
2345
2346 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2347                                   QEMUIOVector *qiov, int nb_sectors,
2348                                   BlockDriverCompletionFunc *cb, void *opaque)
2349 {
2350     BlockDriver *drv = bs->drv;
2351     BlockDriverAIOCB *ret;
2352     BlockCompleteData *blk_cb_data;
2353
2354     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2355
2356     if (!drv)
2357         return NULL;
2358     if (bs->read_only)
2359         return NULL;
2360     if (bdrv_check_request(bs, sector_num, nb_sectors))
2361         return NULL;
2362
2363     if (bs->dirty_bitmap) {
2364         blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2365                                          opaque);
2366         cb = &block_complete_cb;
2367         opaque = blk_cb_data;
2368     }
2369
2370     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2371                                cb, opaque);
2372
2373     if (ret) {
2374         if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2375             bs->wr_highest_sector = sector_num + nb_sectors - 1;
2376         }
2377     }
2378
2379     return ret;
2380 }
2381
2382
2383 typedef struct MultiwriteCB {
2384     int error;
2385     int num_requests;
2386     int num_callbacks;
2387     struct {
2388         BlockDriverCompletionFunc *cb;
2389         void *opaque;
2390         QEMUIOVector *free_qiov;
2391         void *free_buf;
2392     } callbacks[];
2393 } MultiwriteCB;
2394
2395 static void multiwrite_user_cb(MultiwriteCB *mcb)
2396 {
2397     int i;
2398
2399     for (i = 0; i < mcb->num_callbacks; i++) {
2400         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2401         if (mcb->callbacks[i].free_qiov) {
2402             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2403         }
2404         g_free(mcb->callbacks[i].free_qiov);
2405         qemu_vfree(mcb->callbacks[i].free_buf);
2406     }
2407 }
2408
2409 static void multiwrite_cb(void *opaque, int ret)
2410 {
2411     MultiwriteCB *mcb = opaque;
2412
2413     trace_multiwrite_cb(mcb, ret);
2414
2415     if (ret < 0 && !mcb->error) {
2416         mcb->error = ret;
2417     }
2418
2419     mcb->num_requests--;
2420     if (mcb->num_requests == 0) {
2421         multiwrite_user_cb(mcb);
2422         g_free(mcb);
2423     }
2424 }
2425
2426 static int multiwrite_req_compare(const void *a, const void *b)
2427 {
2428     const BlockRequest *req1 = a, *req2 = b;
2429
2430     /*
2431      * Note that we can't simply subtract req2->sector from req1->sector
2432      * here as that could overflow the return value.
2433      */
2434     if (req1->sector > req2->sector) {
2435         return 1;
2436     } else if (req1->sector < req2->sector) {
2437         return -1;
2438     } else {
2439         return 0;
2440     }
2441 }
2442
2443 /*
2444  * Takes a bunch of requests and tries to merge them. Returns the number of
2445  * requests that remain after merging.
2446  */
2447 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2448     int num_reqs, MultiwriteCB *mcb)
2449 {
2450     int i, outidx;
2451
2452     // Sort requests by start sector
2453     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2454
2455     // Check if adjacent requests touch the same clusters. If so, combine them,
2456     // filling up gaps with zero sectors.
2457     outidx = 0;
2458     for (i = 1; i < num_reqs; i++) {
2459         int merge = 0;
2460         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2461
2462         // This handles the cases that are valid for all block drivers, namely
2463         // exactly sequential writes and overlapping writes.
2464         if (reqs[i].sector <= oldreq_last) {
2465             merge = 1;
2466         }
2467
2468         // The block driver may decide that it makes sense to combine requests
2469         // even if there is a gap of some sectors between them. In this case,
2470         // the gap is filled with zeros (therefore only applicable for yet
2471         // unused space in format like qcow2).
2472         if (!merge && bs->drv->bdrv_merge_requests) {
2473             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2474         }
2475
2476         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2477             merge = 0;
2478         }
2479
2480         if (merge) {
2481             size_t size;
2482             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2483             qemu_iovec_init(qiov,
2484                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2485
2486             // Add the first request to the merged one. If the requests are
2487             // overlapping, drop the last sectors of the first request.
2488             size = (reqs[i].sector - reqs[outidx].sector) << 9;
2489             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2490
2491             // We might need to add some zeros between the two requests
2492             if (reqs[i].sector > oldreq_last) {
2493                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2494                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2495                 memset(buf, 0, zero_bytes);
2496                 qemu_iovec_add(qiov, buf, zero_bytes);
2497                 mcb->callbacks[i].free_buf = buf;
2498             }
2499
2500             // Add the second request
2501             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2502
2503             reqs[outidx].nb_sectors = qiov->size >> 9;
2504             reqs[outidx].qiov = qiov;
2505
2506             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2507         } else {
2508             outidx++;
2509             reqs[outidx].sector     = reqs[i].sector;
2510             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2511             reqs[outidx].qiov       = reqs[i].qiov;
2512         }
2513     }
2514
2515     return outidx + 1;
2516 }
2517
2518 /*
2519  * Submit multiple AIO write requests at once.
2520  *
2521  * On success, the function returns 0 and all requests in the reqs array have
2522  * been submitted. In error case this function returns -1, and any of the
2523  * requests may or may not be submitted yet. In particular, this means that the
2524  * callback will be called for some of the requests, for others it won't. The
2525  * caller must check the error field of the BlockRequest to wait for the right
2526  * callbacks (if error != 0, no callback will be called).
2527  *
2528  * The implementation may modify the contents of the reqs array, e.g. to merge
2529  * requests. However, the fields opaque and error are left unmodified as they
2530  * are used to signal failure for a single request to the caller.
2531  */
2532 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2533 {
2534     BlockDriverAIOCB *acb;
2535     MultiwriteCB *mcb;
2536     int i;
2537
2538     /* don't submit writes if we don't have a medium */
2539     if (bs->drv == NULL) {
2540         for (i = 0; i < num_reqs; i++) {
2541             reqs[i].error = -ENOMEDIUM;
2542         }
2543         return -1;
2544     }
2545
2546     if (num_reqs == 0) {
2547         return 0;
2548     }
2549
2550     // Create MultiwriteCB structure
2551     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2552     mcb->num_requests = 0;
2553     mcb->num_callbacks = num_reqs;
2554
2555     for (i = 0; i < num_reqs; i++) {
2556         mcb->callbacks[i].cb = reqs[i].cb;
2557         mcb->callbacks[i].opaque = reqs[i].opaque;
2558     }
2559
2560     // Check for mergable requests
2561     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2562
2563     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2564
2565     /*
2566      * Run the aio requests. As soon as one request can't be submitted
2567      * successfully, fail all requests that are not yet submitted (we must
2568      * return failure for all requests anyway)
2569      *
2570      * num_requests cannot be set to the right value immediately: If
2571      * bdrv_aio_writev fails for some request, num_requests would be too high
2572      * and therefore multiwrite_cb() would never recognize the multiwrite
2573      * request as completed. We also cannot use the loop variable i to set it
2574      * when the first request fails because the callback may already have been
2575      * called for previously submitted requests. Thus, num_requests must be
2576      * incremented for each request that is submitted.
2577      *
2578      * The problem that callbacks may be called early also means that we need
2579      * to take care that num_requests doesn't become 0 before all requests are
2580      * submitted - multiwrite_cb() would consider the multiwrite request
2581      * completed. A dummy request that is "completed" by a manual call to
2582      * multiwrite_cb() takes care of this.
2583      */
2584     mcb->num_requests = 1;
2585
2586     // Run the aio requests
2587     for (i = 0; i < num_reqs; i++) {
2588         mcb->num_requests++;
2589         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2590             reqs[i].nb_sectors, multiwrite_cb, mcb);
2591
2592         if (acb == NULL) {
2593             // We can only fail the whole thing if no request has been
2594             // submitted yet. Otherwise we'll wait for the submitted AIOs to
2595             // complete and report the error in the callback.
2596             if (i == 0) {
2597                 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2598                 goto fail;
2599             } else {
2600                 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2601                 multiwrite_cb(mcb, -EIO);
2602                 break;
2603             }
2604         }
2605     }
2606
2607     /* Complete the dummy request */
2608     multiwrite_cb(mcb, 0);
2609
2610     return 0;
2611
2612 fail:
2613     for (i = 0; i < mcb->num_callbacks; i++) {
2614         reqs[i].error = -EIO;
2615     }
2616     g_free(mcb);
2617     return -1;
2618 }
2619
2620 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2621         BlockDriverCompletionFunc *cb, void *opaque)
2622 {
2623     BlockDriver *drv = bs->drv;
2624
2625     trace_bdrv_aio_flush(bs, opaque);
2626
2627     if (bs->open_flags & BDRV_O_NO_FLUSH) {
2628         return bdrv_aio_noop_em(bs, cb, opaque);
2629     }
2630
2631     if (!drv)
2632         return NULL;
2633     return drv->bdrv_aio_flush(bs, cb, opaque);
2634 }
2635
2636 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2637 {
2638     acb->pool->cancel(acb);
2639 }
2640
2641
2642 /**************************************************************/
2643 /* async block device emulation */
2644
2645 typedef struct BlockDriverAIOCBSync {
2646     BlockDriverAIOCB common;
2647     QEMUBH *bh;
2648     int ret;
2649     /* vector translation state */
2650     QEMUIOVector *qiov;
2651     uint8_t *bounce;
2652     int is_write;
2653 } BlockDriverAIOCBSync;
2654
2655 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2656 {
2657     BlockDriverAIOCBSync *acb =
2658         container_of(blockacb, BlockDriverAIOCBSync, common);
2659     qemu_bh_delete(acb->bh);
2660     acb->bh = NULL;
2661     qemu_aio_release(acb);
2662 }
2663
2664 static AIOPool bdrv_em_aio_pool = {
2665     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2666     .cancel             = bdrv_aio_cancel_em,
2667 };
2668
2669 static void bdrv_aio_bh_cb(void *opaque)
2670 {
2671     BlockDriverAIOCBSync *acb = opaque;
2672
2673     if (!acb->is_write)
2674         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2675     qemu_vfree(acb->bounce);
2676     acb->common.cb(acb->common.opaque, acb->ret);
2677     qemu_bh_delete(acb->bh);
2678     acb->bh = NULL;
2679     qemu_aio_release(acb);
2680 }
2681
2682 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2683                                             int64_t sector_num,
2684                                             QEMUIOVector *qiov,
2685                                             int nb_sectors,
2686                                             BlockDriverCompletionFunc *cb,
2687                                             void *opaque,
2688                                             int is_write)
2689
2690 {
2691     BlockDriverAIOCBSync *acb;
2692
2693     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2694     acb->is_write = is_write;
2695     acb->qiov = qiov;
2696     acb->bounce = qemu_blockalign(bs, qiov->size);
2697
2698     if (!acb->bh)
2699         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2700
2701     if (is_write) {
2702         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2703         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2704     } else {
2705         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2706     }
2707
2708     qemu_bh_schedule(acb->bh);
2709
2710     return &acb->common;
2711 }
2712
2713 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2714         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2715         BlockDriverCompletionFunc *cb, void *opaque)
2716 {
2717     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2718 }
2719
2720 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2721         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2722         BlockDriverCompletionFunc *cb, void *opaque)
2723 {
2724     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2725 }
2726
2727
2728 typedef struct BlockDriverAIOCBCoroutine {
2729     BlockDriverAIOCB common;
2730     BlockRequest req;
2731     bool is_write;
2732     QEMUBH* bh;
2733 } BlockDriverAIOCBCoroutine;
2734
2735 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2736 {
2737     qemu_aio_flush();
2738 }
2739
2740 static AIOPool bdrv_em_co_aio_pool = {
2741     .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
2742     .cancel             = bdrv_aio_co_cancel_em,
2743 };
2744
2745 static void bdrv_co_rw_bh(void *opaque)
2746 {
2747     BlockDriverAIOCBCoroutine *acb = opaque;
2748
2749     acb->common.cb(acb->common.opaque, acb->req.error);
2750     qemu_bh_delete(acb->bh);
2751     qemu_aio_release(acb);
2752 }
2753
2754 static void coroutine_fn bdrv_co_rw(void *opaque)
2755 {
2756     BlockDriverAIOCBCoroutine *acb = opaque;
2757     BlockDriverState *bs = acb->common.bs;
2758
2759     if (!acb->is_write) {
2760         acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2761             acb->req.nb_sectors, acb->req.qiov);
2762     } else {
2763         acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2764             acb->req.nb_sectors, acb->req.qiov);
2765     }
2766
2767     acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2768     qemu_bh_schedule(acb->bh);
2769 }
2770
2771 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2772                                                int64_t sector_num,
2773                                                QEMUIOVector *qiov,
2774                                                int nb_sectors,
2775                                                BlockDriverCompletionFunc *cb,
2776                                                void *opaque,
2777                                                bool is_write)
2778 {
2779     Coroutine *co;
2780     BlockDriverAIOCBCoroutine *acb;
2781
2782     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2783     acb->req.sector = sector_num;
2784     acb->req.nb_sectors = nb_sectors;
2785     acb->req.qiov = qiov;
2786     acb->is_write = is_write;
2787
2788     co = qemu_coroutine_create(bdrv_co_rw);
2789     qemu_coroutine_enter(co, acb);
2790
2791     return &acb->common;
2792 }
2793
2794 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2795         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2796         BlockDriverCompletionFunc *cb, void *opaque)
2797 {
2798     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2799                                  false);
2800 }
2801
2802 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2803         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2804         BlockDriverCompletionFunc *cb, void *opaque)
2805 {
2806     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2807                                  true);
2808 }
2809
2810 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2811         BlockDriverCompletionFunc *cb, void *opaque)
2812 {
2813     BlockDriverAIOCBSync *acb;
2814
2815     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2816     acb->is_write = 1; /* don't bounce in the completion hadler */
2817     acb->qiov = NULL;
2818     acb->bounce = NULL;
2819     acb->ret = 0;
2820
2821     if (!acb->bh)
2822         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2823
2824     bdrv_flush(bs);
2825     qemu_bh_schedule(acb->bh);
2826     return &acb->common;
2827 }
2828
2829 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2830         BlockDriverCompletionFunc *cb, void *opaque)
2831 {
2832     BlockDriverAIOCBSync *acb;
2833
2834     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2835     acb->is_write = 1; /* don't bounce in the completion handler */
2836     acb->qiov = NULL;
2837     acb->bounce = NULL;
2838     acb->ret = 0;
2839
2840     if (!acb->bh) {
2841         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2842     }
2843
2844     qemu_bh_schedule(acb->bh);
2845     return &acb->common;
2846 }
2847
2848 /**************************************************************/
2849 /* sync block device emulation */
2850
2851 static void bdrv_rw_em_cb(void *opaque, int ret)
2852 {
2853     *(int *)opaque = ret;
2854 }
2855
2856 #define NOT_DONE 0x7fffffff
2857
2858 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2859                         uint8_t *buf, int nb_sectors)
2860 {
2861     int async_ret;
2862     BlockDriverAIOCB *acb;
2863     struct iovec iov;
2864     QEMUIOVector qiov;
2865
2866     async_ret = NOT_DONE;
2867     iov.iov_base = (void *)buf;
2868     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2869     qemu_iovec_init_external(&qiov, &iov, 1);
2870     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2871         bdrv_rw_em_cb, &async_ret);
2872     if (acb == NULL) {
2873         async_ret = -1;
2874         goto fail;
2875     }
2876
2877     while (async_ret == NOT_DONE) {
2878         qemu_aio_wait();
2879     }
2880
2881
2882 fail:
2883     return async_ret;
2884 }
2885
2886 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2887                          const uint8_t *buf, int nb_sectors)
2888 {
2889     int async_ret;
2890     BlockDriverAIOCB *acb;
2891     struct iovec iov;
2892     QEMUIOVector qiov;
2893
2894     async_ret = NOT_DONE;
2895     iov.iov_base = (void *)buf;
2896     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2897     qemu_iovec_init_external(&qiov, &iov, 1);
2898     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2899         bdrv_rw_em_cb, &async_ret);
2900     if (acb == NULL) {
2901         async_ret = -1;
2902         goto fail;
2903     }
2904     while (async_ret == NOT_DONE) {
2905         qemu_aio_wait();
2906     }
2907
2908 fail:
2909     return async_ret;
2910 }
2911
2912 void bdrv_init(void)
2913 {
2914     module_call_init(MODULE_INIT_BLOCK);
2915 }
2916
2917 void bdrv_init_with_whitelist(void)
2918 {
2919     use_bdrv_whitelist = 1;
2920     bdrv_init();
2921 }
2922
2923 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2924                    BlockDriverCompletionFunc *cb, void *opaque)
2925 {
2926     BlockDriverAIOCB *acb;
2927
2928     if (pool->free_aiocb) {
2929         acb = pool->free_aiocb;
2930         pool->free_aiocb = acb->next;
2931     } else {
2932         acb = g_malloc0(pool->aiocb_size);
2933         acb->pool = pool;
2934     }
2935     acb->bs = bs;
2936     acb->cb = cb;
2937     acb->opaque = opaque;
2938     return acb;
2939 }
2940
2941 void qemu_aio_release(void *p)
2942 {
2943     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2944     AIOPool *pool = acb->pool;
2945     acb->next = pool->free_aiocb;
2946     pool->free_aiocb = acb;
2947 }
2948
2949 /**************************************************************/
2950 /* Coroutine block device emulation */
2951
2952 typedef struct CoroutineIOCompletion {
2953     Coroutine *coroutine;
2954     int ret;
2955 } CoroutineIOCompletion;
2956
2957 static void bdrv_co_io_em_complete(void *opaque, int ret)
2958 {
2959     CoroutineIOCompletion *co = opaque;
2960
2961     co->ret = ret;
2962     qemu_coroutine_enter(co->coroutine, NULL);
2963 }
2964
2965 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2966                                       int nb_sectors, QEMUIOVector *iov,
2967                                       bool is_write)
2968 {
2969     CoroutineIOCompletion co = {
2970         .coroutine = qemu_coroutine_self(),
2971     };
2972     BlockDriverAIOCB *acb;
2973
2974     if (is_write) {
2975         acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2976                               bdrv_co_io_em_complete, &co);
2977     } else {
2978         acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2979                              bdrv_co_io_em_complete, &co);
2980     }
2981
2982     trace_bdrv_co_io(is_write, acb);
2983     if (!acb) {
2984         return -EIO;
2985     }
2986     qemu_coroutine_yield();
2987
2988     return co.ret;
2989 }
2990
2991 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2992                                          int64_t sector_num, int nb_sectors,
2993                                          QEMUIOVector *iov)
2994 {
2995     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2996 }
2997
2998 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2999                                          int64_t sector_num, int nb_sectors,
3000                                          QEMUIOVector *iov)
3001 {
3002     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3003 }
3004
3005 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
3006 {
3007     CoroutineIOCompletion co = {
3008         .coroutine = qemu_coroutine_self(),
3009     };
3010     BlockDriverAIOCB *acb;
3011
3012     acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3013     if (!acb) {
3014         return -EIO;
3015     }
3016     qemu_coroutine_yield();
3017     return co.ret;
3018 }
3019
3020 /**************************************************************/
3021 /* removable device support */
3022
3023 /**
3024  * Return TRUE if the media is present
3025  */
3026 int bdrv_is_inserted(BlockDriverState *bs)
3027 {
3028     BlockDriver *drv = bs->drv;
3029
3030     if (!drv)
3031         return 0;
3032     if (!drv->bdrv_is_inserted)
3033         return 1;
3034     return drv->bdrv_is_inserted(bs);
3035 }
3036
3037 /**
3038  * Return whether the media changed since the last call to this
3039  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3040  */
3041 int bdrv_media_changed(BlockDriverState *bs)
3042 {
3043     BlockDriver *drv = bs->drv;
3044
3045     if (drv && drv->bdrv_media_changed) {
3046         return drv->bdrv_media_changed(bs);
3047     }
3048     return -ENOTSUP;
3049 }
3050
3051 /**
3052  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3053  */
3054 void bdrv_eject(BlockDriverState *bs, int eject_flag)
3055 {
3056     BlockDriver *drv = bs->drv;
3057
3058     if (drv && drv->bdrv_eject) {
3059         drv->bdrv_eject(bs, eject_flag);
3060     }
3061 }
3062
3063 int bdrv_is_locked(BlockDriverState *bs)
3064 {
3065     return bs->locked;
3066 }
3067
3068 /**
3069  * Lock or unlock the media (if it is locked, the user won't be able
3070  * to eject it manually).
3071  */
3072 void bdrv_set_locked(BlockDriverState *bs, int locked)
3073 {
3074     BlockDriver *drv = bs->drv;
3075
3076     trace_bdrv_set_locked(bs, locked);
3077
3078     bs->locked = locked;
3079     if (drv && drv->bdrv_set_locked) {
3080         drv->bdrv_set_locked(bs, locked);
3081     }
3082 }
3083
3084 /* needed for generic scsi interface */
3085
3086 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3087 {
3088     BlockDriver *drv = bs->drv;
3089
3090     if (drv && drv->bdrv_ioctl)
3091         return drv->bdrv_ioctl(bs, req, buf);
3092     return -ENOTSUP;
3093 }
3094
3095 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3096         unsigned long int req, void *buf,
3097         BlockDriverCompletionFunc *cb, void *opaque)
3098 {
3099     BlockDriver *drv = bs->drv;
3100
3101     if (drv && drv->bdrv_aio_ioctl)
3102         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3103     return NULL;
3104 }
3105
3106
3107
3108 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3109 {
3110     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3111 }
3112
3113 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3114 {
3115     int64_t bitmap_size;
3116
3117     bs->dirty_count = 0;
3118     if (enable) {
3119         if (!bs->dirty_bitmap) {
3120             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3121                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3122             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3123
3124             bs->dirty_bitmap = g_malloc0(bitmap_size);
3125         }
3126     } else {
3127         if (bs->dirty_bitmap) {
3128             g_free(bs->dirty_bitmap);
3129             bs->dirty_bitmap = NULL;
3130         }
3131     }
3132 }
3133
3134 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3135 {
3136     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3137
3138     if (bs->dirty_bitmap &&
3139         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3140         return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3141             (1UL << (chunk % (sizeof(unsigned long) * 8))));
3142     } else {
3143         return 0;
3144     }
3145 }
3146
3147 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3148                       int nr_sectors)
3149 {
3150     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3151 }
3152
3153 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3154 {
3155     return bs->dirty_count;
3156 }
3157
3158 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3159 {
3160     assert(bs->in_use != in_use);
3161     bs->in_use = in_use;
3162 }
3163
3164 int bdrv_in_use(BlockDriverState *bs)
3165 {
3166     return bs->in_use;
3167 }
3168
3169 void
3170 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3171         enum BlockAcctType type)
3172 {
3173     assert(type < BDRV_MAX_IOTYPE);
3174
3175     cookie->bytes = bytes;
3176     cookie->start_time_ns = get_clock();
3177     cookie->type = type;
3178 }
3179
3180 void
3181 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3182 {
3183     assert(cookie->type < BDRV_MAX_IOTYPE);
3184
3185     bs->nr_bytes[cookie->type] += cookie->bytes;
3186     bs->nr_ops[cookie->type]++;
3187     bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3188 }
3189
3190 int bdrv_img_create(const char *filename, const char *fmt,
3191                     const char *base_filename, const char *base_fmt,
3192                     char *options, uint64_t img_size, int flags)
3193 {
3194     QEMUOptionParameter *param = NULL, *create_options = NULL;
3195     QEMUOptionParameter *backing_fmt, *backing_file, *size;
3196     BlockDriverState *bs = NULL;
3197     BlockDriver *drv, *proto_drv;
3198     BlockDriver *backing_drv = NULL;
3199     int ret = 0;
3200
3201     /* Find driver and parse its options */
3202     drv = bdrv_find_format(fmt);
3203     if (!drv) {
3204         error_report("Unknown file format '%s'", fmt);
3205         ret = -EINVAL;
3206         goto out;
3207     }
3208
3209     proto_drv = bdrv_find_protocol(filename);
3210     if (!proto_drv) {
3211         error_report("Unknown protocol '%s'", filename);
3212         ret = -EINVAL;
3213         goto out;
3214     }
3215
3216     create_options = append_option_parameters(create_options,
3217                                               drv->create_options);
3218     create_options = append_option_parameters(create_options,
3219                                               proto_drv->create_options);
3220
3221     /* Create parameter list with default values */
3222     param = parse_option_parameters("", create_options, param);
3223
3224     set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3225
3226     /* Parse -o options */
3227     if (options) {
3228         param = parse_option_parameters(options, create_options, param);
3229         if (param == NULL) {
3230             error_report("Invalid options for file format '%s'.", fmt);
3231             ret = -EINVAL;
3232             goto out;
3233         }
3234     }
3235
3236     if (base_filename) {
3237         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3238                                  base_filename)) {
3239             error_report("Backing file not supported for file format '%s'",
3240                          fmt);
3241             ret = -EINVAL;
3242             goto out;
3243         }
3244     }
3245
3246     if (base_fmt) {
3247         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3248             error_report("Backing file format not supported for file "
3249                          "format '%s'", fmt);
3250             ret = -EINVAL;
3251             goto out;
3252         }
3253     }
3254
3255     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3256     if (backing_file && backing_file->value.s) {
3257         if (!strcmp(filename, backing_file->value.s)) {
3258             error_report("Error: Trying to create an image with the "
3259                          "same filename as the backing file");
3260             ret = -EINVAL;
3261             goto out;
3262         }
3263     }
3264
3265     backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3266     if (backing_fmt && backing_fmt->value.s) {
3267         backing_drv = bdrv_find_format(backing_fmt->value.s);
3268         if (!backing_drv) {
3269             error_report("Unknown backing file format '%s'",
3270                          backing_fmt->value.s);
3271             ret = -EINVAL;
3272             goto out;
3273         }
3274     }
3275
3276     // The size for the image must always be specified, with one exception:
3277     // If we are using a backing file, we can obtain the size from there
3278     size = get_option_parameter(param, BLOCK_OPT_SIZE);
3279     if (size && size->value.n == -1) {
3280         if (backing_file && backing_file->value.s) {
3281             uint64_t size;
3282             char buf[32];
3283
3284             bs = bdrv_new("");
3285
3286             ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3287             if (ret < 0) {
3288                 error_report("Could not open '%s'", backing_file->value.s);
3289                 goto out;
3290             }
3291             bdrv_get_geometry(bs, &size);
3292             size *= 512;
3293
3294             snprintf(buf, sizeof(buf), "%" PRId64, size);
3295             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3296         } else {
3297             error_report("Image creation needs a size parameter");
3298             ret = -EINVAL;
3299             goto out;
3300         }
3301     }
3302
3303     printf("Formatting '%s', fmt=%s ", filename, fmt);
3304     print_option_parameters(param);
3305     puts("");
3306
3307     ret = bdrv_create(drv, filename, param);
3308
3309     if (ret < 0) {
3310         if (ret == -ENOTSUP) {
3311             error_report("Formatting or formatting option not supported for "
3312                          "file format '%s'", fmt);
3313         } else if (ret == -EFBIG) {
3314             error_report("The image size is too large for file format '%s'",
3315                          fmt);
3316         } else {
3317             error_report("%s: error while creating %s: %s", filename, fmt,
3318                          strerror(-ret));
3319         }
3320     }
3321
3322 out:
3323     free_option_parameters(create_options);
3324     free_option_parameters(param);
3325
3326     if (bs) {
3327         bdrv_delete(bs);
3328     }
3329
3330     return ret;
3331 }
This page took 0.207676 seconds and 4 git commands to generate.