]> Git Repo - qemu.git/blob - block.c
block: add bdrv_co_discard and bdrv_aio_discard support
[qemu.git] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
32
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
49 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
50 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
55         BlockDriverCompletionFunc *cb, void *opaque);
56 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
57                                          int64_t sector_num, int nb_sectors,
58                                          QEMUIOVector *iov);
59 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
60                                          int64_t sector_num, int nb_sectors,
61                                          QEMUIOVector *iov);
62 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
63     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
64 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
65     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
66 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
67                                                int64_t sector_num,
68                                                QEMUIOVector *qiov,
69                                                int nb_sectors,
70                                                BlockDriverCompletionFunc *cb,
71                                                void *opaque,
72                                                bool is_write);
73 static void coroutine_fn bdrv_co_do_rw(void *opaque);
74
75 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
76     QTAILQ_HEAD_INITIALIZER(bdrv_states);
77
78 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
79     QLIST_HEAD_INITIALIZER(bdrv_drivers);
80
81 /* The device to use for VM snapshots */
82 static BlockDriverState *bs_snapshots;
83
84 /* If non-zero, use only whitelisted block drivers */
85 static int use_bdrv_whitelist;
86
87 #ifdef _WIN32
88 static int is_windows_drive_prefix(const char *filename)
89 {
90     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
91              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
92             filename[1] == ':');
93 }
94
95 int is_windows_drive(const char *filename)
96 {
97     if (is_windows_drive_prefix(filename) &&
98         filename[2] == '\0')
99         return 1;
100     if (strstart(filename, "\\\\.\\", NULL) ||
101         strstart(filename, "//./", NULL))
102         return 1;
103     return 0;
104 }
105 #endif
106
107 /* check if the path starts with "<protocol>:" */
108 static int path_has_protocol(const char *path)
109 {
110 #ifdef _WIN32
111     if (is_windows_drive(path) ||
112         is_windows_drive_prefix(path)) {
113         return 0;
114     }
115 #endif
116
117     return strchr(path, ':') != NULL;
118 }
119
120 int path_is_absolute(const char *path)
121 {
122     const char *p;
123 #ifdef _WIN32
124     /* specific case for names like: "\\.\d:" */
125     if (*path == '/' || *path == '\\')
126         return 1;
127 #endif
128     p = strchr(path, ':');
129     if (p)
130         p++;
131     else
132         p = path;
133 #ifdef _WIN32
134     return (*p == '/' || *p == '\\');
135 #else
136     return (*p == '/');
137 #endif
138 }
139
140 /* if filename is absolute, just copy it to dest. Otherwise, build a
141    path to it by considering it is relative to base_path. URL are
142    supported. */
143 void path_combine(char *dest, int dest_size,
144                   const char *base_path,
145                   const char *filename)
146 {
147     const char *p, *p1;
148     int len;
149
150     if (dest_size <= 0)
151         return;
152     if (path_is_absolute(filename)) {
153         pstrcpy(dest, dest_size, filename);
154     } else {
155         p = strchr(base_path, ':');
156         if (p)
157             p++;
158         else
159             p = base_path;
160         p1 = strrchr(base_path, '/');
161 #ifdef _WIN32
162         {
163             const char *p2;
164             p2 = strrchr(base_path, '\\');
165             if (!p1 || p2 > p1)
166                 p1 = p2;
167         }
168 #endif
169         if (p1)
170             p1++;
171         else
172             p1 = base_path;
173         if (p1 > p)
174             p = p1;
175         len = p - base_path;
176         if (len > dest_size - 1)
177             len = dest_size - 1;
178         memcpy(dest, base_path, len);
179         dest[len] = '\0';
180         pstrcat(dest, dest_size, filename);
181     }
182 }
183
184 void bdrv_register(BlockDriver *bdrv)
185 {
186     /* Block drivers without coroutine functions need emulation */
187     if (!bdrv->bdrv_co_readv) {
188         bdrv->bdrv_co_readv = bdrv_co_readv_em;
189         bdrv->bdrv_co_writev = bdrv_co_writev_em;
190
191         /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
192          * the block driver lacks aio we need to emulate that too.
193          */
194         if (!bdrv->bdrv_aio_readv) {
195             /* add AIO emulation layer */
196             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
197             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
198         }
199     }
200
201     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
202 }
203
204 /* create a new block device (by default it is empty) */
205 BlockDriverState *bdrv_new(const char *device_name)
206 {
207     BlockDriverState *bs;
208
209     bs = g_malloc0(sizeof(BlockDriverState));
210     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
211     if (device_name[0] != '\0') {
212         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
213     }
214     bdrv_iostatus_disable(bs);
215     return bs;
216 }
217
218 BlockDriver *bdrv_find_format(const char *format_name)
219 {
220     BlockDriver *drv1;
221     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
222         if (!strcmp(drv1->format_name, format_name)) {
223             return drv1;
224         }
225     }
226     return NULL;
227 }
228
229 static int bdrv_is_whitelisted(BlockDriver *drv)
230 {
231     static const char *whitelist[] = {
232         CONFIG_BDRV_WHITELIST
233     };
234     const char **p;
235
236     if (!whitelist[0])
237         return 1;               /* no whitelist, anything goes */
238
239     for (p = whitelist; *p; p++) {
240         if (!strcmp(drv->format_name, *p)) {
241             return 1;
242         }
243     }
244     return 0;
245 }
246
247 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
248 {
249     BlockDriver *drv = bdrv_find_format(format_name);
250     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
251 }
252
253 int bdrv_create(BlockDriver *drv, const char* filename,
254     QEMUOptionParameter *options)
255 {
256     if (!drv->bdrv_create)
257         return -ENOTSUP;
258
259     return drv->bdrv_create(filename, options);
260 }
261
262 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
263 {
264     BlockDriver *drv;
265
266     drv = bdrv_find_protocol(filename);
267     if (drv == NULL) {
268         return -ENOENT;
269     }
270
271     return bdrv_create(drv, filename, options);
272 }
273
274 #ifdef _WIN32
275 void get_tmp_filename(char *filename, int size)
276 {
277     char temp_dir[MAX_PATH];
278
279     GetTempPath(MAX_PATH, temp_dir);
280     GetTempFileName(temp_dir, "qem", 0, filename);
281 }
282 #else
283 void get_tmp_filename(char *filename, int size)
284 {
285     int fd;
286     const char *tmpdir;
287     /* XXX: race condition possible */
288     tmpdir = getenv("TMPDIR");
289     if (!tmpdir)
290         tmpdir = "/tmp";
291     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
292     fd = mkstemp(filename);
293     close(fd);
294 }
295 #endif
296
297 /*
298  * Detect host devices. By convention, /dev/cdrom[N] is always
299  * recognized as a host CDROM.
300  */
301 static BlockDriver *find_hdev_driver(const char *filename)
302 {
303     int score_max = 0, score;
304     BlockDriver *drv = NULL, *d;
305
306     QLIST_FOREACH(d, &bdrv_drivers, list) {
307         if (d->bdrv_probe_device) {
308             score = d->bdrv_probe_device(filename);
309             if (score > score_max) {
310                 score_max = score;
311                 drv = d;
312             }
313         }
314     }
315
316     return drv;
317 }
318
319 BlockDriver *bdrv_find_protocol(const char *filename)
320 {
321     BlockDriver *drv1;
322     char protocol[128];
323     int len;
324     const char *p;
325
326     /* TODO Drivers without bdrv_file_open must be specified explicitly */
327
328     /*
329      * XXX(hch): we really should not let host device detection
330      * override an explicit protocol specification, but moving this
331      * later breaks access to device names with colons in them.
332      * Thanks to the brain-dead persistent naming schemes on udev-
333      * based Linux systems those actually are quite common.
334      */
335     drv1 = find_hdev_driver(filename);
336     if (drv1) {
337         return drv1;
338     }
339
340     if (!path_has_protocol(filename)) {
341         return bdrv_find_format("file");
342     }
343     p = strchr(filename, ':');
344     assert(p != NULL);
345     len = p - filename;
346     if (len > sizeof(protocol) - 1)
347         len = sizeof(protocol) - 1;
348     memcpy(protocol, filename, len);
349     protocol[len] = '\0';
350     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
351         if (drv1->protocol_name &&
352             !strcmp(drv1->protocol_name, protocol)) {
353             return drv1;
354         }
355     }
356     return NULL;
357 }
358
359 static int find_image_format(const char *filename, BlockDriver **pdrv)
360 {
361     int ret, score, score_max;
362     BlockDriver *drv1, *drv;
363     uint8_t buf[2048];
364     BlockDriverState *bs;
365
366     ret = bdrv_file_open(&bs, filename, 0);
367     if (ret < 0) {
368         *pdrv = NULL;
369         return ret;
370     }
371
372     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
373     if (bs->sg || !bdrv_is_inserted(bs)) {
374         bdrv_delete(bs);
375         drv = bdrv_find_format("raw");
376         if (!drv) {
377             ret = -ENOENT;
378         }
379         *pdrv = drv;
380         return ret;
381     }
382
383     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
384     bdrv_delete(bs);
385     if (ret < 0) {
386         *pdrv = NULL;
387         return ret;
388     }
389
390     score_max = 0;
391     drv = NULL;
392     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
393         if (drv1->bdrv_probe) {
394             score = drv1->bdrv_probe(buf, ret, filename);
395             if (score > score_max) {
396                 score_max = score;
397                 drv = drv1;
398             }
399         }
400     }
401     if (!drv) {
402         ret = -ENOENT;
403     }
404     *pdrv = drv;
405     return ret;
406 }
407
408 /**
409  * Set the current 'total_sectors' value
410  */
411 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
412 {
413     BlockDriver *drv = bs->drv;
414
415     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
416     if (bs->sg)
417         return 0;
418
419     /* query actual device if possible, otherwise just trust the hint */
420     if (drv->bdrv_getlength) {
421         int64_t length = drv->bdrv_getlength(bs);
422         if (length < 0) {
423             return length;
424         }
425         hint = length >> BDRV_SECTOR_BITS;
426     }
427
428     bs->total_sectors = hint;
429     return 0;
430 }
431
432 /**
433  * Set open flags for a given cache mode
434  *
435  * Return 0 on success, -1 if the cache mode was invalid.
436  */
437 int bdrv_parse_cache_flags(const char *mode, int *flags)
438 {
439     *flags &= ~BDRV_O_CACHE_MASK;
440
441     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
442         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
443     } else if (!strcmp(mode, "directsync")) {
444         *flags |= BDRV_O_NOCACHE;
445     } else if (!strcmp(mode, "writeback")) {
446         *flags |= BDRV_O_CACHE_WB;
447     } else if (!strcmp(mode, "unsafe")) {
448         *flags |= BDRV_O_CACHE_WB;
449         *flags |= BDRV_O_NO_FLUSH;
450     } else if (!strcmp(mode, "writethrough")) {
451         /* this is the default */
452     } else {
453         return -1;
454     }
455
456     return 0;
457 }
458
459 /*
460  * Common part for opening disk images and files
461  */
462 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
463     int flags, BlockDriver *drv)
464 {
465     int ret, open_flags;
466
467     assert(drv != NULL);
468
469     trace_bdrv_open_common(bs, filename, flags, drv->format_name);
470
471     bs->file = NULL;
472     bs->total_sectors = 0;
473     bs->encrypted = 0;
474     bs->valid_key = 0;
475     bs->open_flags = flags;
476     bs->buffer_alignment = 512;
477
478     pstrcpy(bs->filename, sizeof(bs->filename), filename);
479
480     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
481         return -ENOTSUP;
482     }
483
484     bs->drv = drv;
485     bs->opaque = g_malloc0(drv->instance_size);
486
487     if (flags & BDRV_O_CACHE_WB)
488         bs->enable_write_cache = 1;
489
490     /*
491      * Clear flags that are internal to the block layer before opening the
492      * image.
493      */
494     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
495
496     /*
497      * Snapshots should be writable.
498      */
499     if (bs->is_temporary) {
500         open_flags |= BDRV_O_RDWR;
501     }
502
503     /* Open the image, either directly or using a protocol */
504     if (drv->bdrv_file_open) {
505         ret = drv->bdrv_file_open(bs, filename, open_flags);
506     } else {
507         ret = bdrv_file_open(&bs->file, filename, open_flags);
508         if (ret >= 0) {
509             ret = drv->bdrv_open(bs, open_flags);
510         }
511     }
512
513     if (ret < 0) {
514         goto free_and_fail;
515     }
516
517     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
518
519     ret = refresh_total_sectors(bs, bs->total_sectors);
520     if (ret < 0) {
521         goto free_and_fail;
522     }
523
524 #ifndef _WIN32
525     if (bs->is_temporary) {
526         unlink(filename);
527     }
528 #endif
529     return 0;
530
531 free_and_fail:
532     if (bs->file) {
533         bdrv_delete(bs->file);
534         bs->file = NULL;
535     }
536     g_free(bs->opaque);
537     bs->opaque = NULL;
538     bs->drv = NULL;
539     return ret;
540 }
541
542 /*
543  * Opens a file using a protocol (file, host_device, nbd, ...)
544  */
545 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
546 {
547     BlockDriverState *bs;
548     BlockDriver *drv;
549     int ret;
550
551     drv = bdrv_find_protocol(filename);
552     if (!drv) {
553         return -ENOENT;
554     }
555
556     bs = bdrv_new("");
557     ret = bdrv_open_common(bs, filename, flags, drv);
558     if (ret < 0) {
559         bdrv_delete(bs);
560         return ret;
561     }
562     bs->growable = 1;
563     *pbs = bs;
564     return 0;
565 }
566
567 /*
568  * Opens a disk image (raw, qcow2, vmdk, ...)
569  */
570 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
571               BlockDriver *drv)
572 {
573     int ret;
574
575     if (flags & BDRV_O_SNAPSHOT) {
576         BlockDriverState *bs1;
577         int64_t total_size;
578         int is_protocol = 0;
579         BlockDriver *bdrv_qcow2;
580         QEMUOptionParameter *options;
581         char tmp_filename[PATH_MAX];
582         char backing_filename[PATH_MAX];
583
584         /* if snapshot, we create a temporary backing file and open it
585            instead of opening 'filename' directly */
586
587         /* if there is a backing file, use it */
588         bs1 = bdrv_new("");
589         ret = bdrv_open(bs1, filename, 0, drv);
590         if (ret < 0) {
591             bdrv_delete(bs1);
592             return ret;
593         }
594         total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
595
596         if (bs1->drv && bs1->drv->protocol_name)
597             is_protocol = 1;
598
599         bdrv_delete(bs1);
600
601         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
602
603         /* Real path is meaningless for protocols */
604         if (is_protocol)
605             snprintf(backing_filename, sizeof(backing_filename),
606                      "%s", filename);
607         else if (!realpath(filename, backing_filename))
608             return -errno;
609
610         bdrv_qcow2 = bdrv_find_format("qcow2");
611         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
612
613         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
614         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
615         if (drv) {
616             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
617                 drv->format_name);
618         }
619
620         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
621         free_option_parameters(options);
622         if (ret < 0) {
623             return ret;
624         }
625
626         filename = tmp_filename;
627         drv = bdrv_qcow2;
628         bs->is_temporary = 1;
629     }
630
631     /* Find the right image format driver */
632     if (!drv) {
633         ret = find_image_format(filename, &drv);
634     }
635
636     if (!drv) {
637         goto unlink_and_fail;
638     }
639
640     /* Open the image */
641     ret = bdrv_open_common(bs, filename, flags, drv);
642     if (ret < 0) {
643         goto unlink_and_fail;
644     }
645
646     /* If there is a backing file, use it */
647     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
648         char backing_filename[PATH_MAX];
649         int back_flags;
650         BlockDriver *back_drv = NULL;
651
652         bs->backing_hd = bdrv_new("");
653
654         if (path_has_protocol(bs->backing_file)) {
655             pstrcpy(backing_filename, sizeof(backing_filename),
656                     bs->backing_file);
657         } else {
658             path_combine(backing_filename, sizeof(backing_filename),
659                          filename, bs->backing_file);
660         }
661
662         if (bs->backing_format[0] != '\0') {
663             back_drv = bdrv_find_format(bs->backing_format);
664         }
665
666         /* backing files always opened read-only */
667         back_flags =
668             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
669
670         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
671         if (ret < 0) {
672             bdrv_close(bs);
673             return ret;
674         }
675         if (bs->is_temporary) {
676             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
677         } else {
678             /* base image inherits from "parent" */
679             bs->backing_hd->keep_read_only = bs->keep_read_only;
680         }
681     }
682
683     if (!bdrv_key_required(bs)) {
684         bdrv_dev_change_media_cb(bs, true);
685     }
686
687     return 0;
688
689 unlink_and_fail:
690     if (bs->is_temporary) {
691         unlink(filename);
692     }
693     return ret;
694 }
695
696 void bdrv_close(BlockDriverState *bs)
697 {
698     if (bs->drv) {
699         if (bs == bs_snapshots) {
700             bs_snapshots = NULL;
701         }
702         if (bs->backing_hd) {
703             bdrv_delete(bs->backing_hd);
704             bs->backing_hd = NULL;
705         }
706         bs->drv->bdrv_close(bs);
707         g_free(bs->opaque);
708 #ifdef _WIN32
709         if (bs->is_temporary) {
710             unlink(bs->filename);
711         }
712 #endif
713         bs->opaque = NULL;
714         bs->drv = NULL;
715
716         if (bs->file != NULL) {
717             bdrv_close(bs->file);
718         }
719
720         bdrv_dev_change_media_cb(bs, false);
721     }
722 }
723
724 void bdrv_close_all(void)
725 {
726     BlockDriverState *bs;
727
728     QTAILQ_FOREACH(bs, &bdrv_states, list) {
729         bdrv_close(bs);
730     }
731 }
732
733 /* make a BlockDriverState anonymous by removing from bdrv_state list.
734    Also, NULL terminate the device_name to prevent double remove */
735 void bdrv_make_anon(BlockDriverState *bs)
736 {
737     if (bs->device_name[0] != '\0') {
738         QTAILQ_REMOVE(&bdrv_states, bs, list);
739     }
740     bs->device_name[0] = '\0';
741 }
742
743 void bdrv_delete(BlockDriverState *bs)
744 {
745     assert(!bs->dev);
746
747     /* remove from list, if necessary */
748     bdrv_make_anon(bs);
749
750     bdrv_close(bs);
751     if (bs->file != NULL) {
752         bdrv_delete(bs->file);
753     }
754
755     assert(bs != bs_snapshots);
756     g_free(bs);
757 }
758
759 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
760 /* TODO change to DeviceState *dev when all users are qdevified */
761 {
762     if (bs->dev) {
763         return -EBUSY;
764     }
765     bs->dev = dev;
766     bdrv_iostatus_reset(bs);
767     return 0;
768 }
769
770 /* TODO qdevified devices don't use this, remove when devices are qdevified */
771 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
772 {
773     if (bdrv_attach_dev(bs, dev) < 0) {
774         abort();
775     }
776 }
777
778 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
779 /* TODO change to DeviceState *dev when all users are qdevified */
780 {
781     assert(bs->dev == dev);
782     bs->dev = NULL;
783     bs->dev_ops = NULL;
784     bs->dev_opaque = NULL;
785     bs->buffer_alignment = 512;
786 }
787
788 /* TODO change to return DeviceState * when all users are qdevified */
789 void *bdrv_get_attached_dev(BlockDriverState *bs)
790 {
791     return bs->dev;
792 }
793
794 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
795                       void *opaque)
796 {
797     bs->dev_ops = ops;
798     bs->dev_opaque = opaque;
799     if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
800         bs_snapshots = NULL;
801     }
802 }
803
804 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
805 {
806     if (bs->dev_ops && bs->dev_ops->change_media_cb) {
807         bs->dev_ops->change_media_cb(bs->dev_opaque, load);
808     }
809 }
810
811 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
812 {
813     return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
814 }
815
816 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
817 {
818     if (bs->dev_ops && bs->dev_ops->is_tray_open) {
819         return bs->dev_ops->is_tray_open(bs->dev_opaque);
820     }
821     return false;
822 }
823
824 static void bdrv_dev_resize_cb(BlockDriverState *bs)
825 {
826     if (bs->dev_ops && bs->dev_ops->resize_cb) {
827         bs->dev_ops->resize_cb(bs->dev_opaque);
828     }
829 }
830
831 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
832 {
833     if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
834         return bs->dev_ops->is_medium_locked(bs->dev_opaque);
835     }
836     return false;
837 }
838
839 /*
840  * Run consistency checks on an image
841  *
842  * Returns 0 if the check could be completed (it doesn't mean that the image is
843  * free of errors) or -errno when an internal error occurred. The results of the
844  * check are stored in res.
845  */
846 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
847 {
848     if (bs->drv->bdrv_check == NULL) {
849         return -ENOTSUP;
850     }
851
852     memset(res, 0, sizeof(*res));
853     return bs->drv->bdrv_check(bs, res);
854 }
855
856 #define COMMIT_BUF_SECTORS 2048
857
858 /* commit COW file into the raw image */
859 int bdrv_commit(BlockDriverState *bs)
860 {
861     BlockDriver *drv = bs->drv;
862     BlockDriver *backing_drv;
863     int64_t sector, total_sectors;
864     int n, ro, open_flags;
865     int ret = 0, rw_ret = 0;
866     uint8_t *buf;
867     char filename[1024];
868     BlockDriverState *bs_rw, *bs_ro;
869
870     if (!drv)
871         return -ENOMEDIUM;
872     
873     if (!bs->backing_hd) {
874         return -ENOTSUP;
875     }
876
877     if (bs->backing_hd->keep_read_only) {
878         return -EACCES;
879     }
880
881     backing_drv = bs->backing_hd->drv;
882     ro = bs->backing_hd->read_only;
883     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
884     open_flags =  bs->backing_hd->open_flags;
885
886     if (ro) {
887         /* re-open as RW */
888         bdrv_delete(bs->backing_hd);
889         bs->backing_hd = NULL;
890         bs_rw = bdrv_new("");
891         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
892             backing_drv);
893         if (rw_ret < 0) {
894             bdrv_delete(bs_rw);
895             /* try to re-open read-only */
896             bs_ro = bdrv_new("");
897             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
898                 backing_drv);
899             if (ret < 0) {
900                 bdrv_delete(bs_ro);
901                 /* drive not functional anymore */
902                 bs->drv = NULL;
903                 return ret;
904             }
905             bs->backing_hd = bs_ro;
906             return rw_ret;
907         }
908         bs->backing_hd = bs_rw;
909     }
910
911     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
912     buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
913
914     for (sector = 0; sector < total_sectors; sector += n) {
915         if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
916
917             if (bdrv_read(bs, sector, buf, n) != 0) {
918                 ret = -EIO;
919                 goto ro_cleanup;
920             }
921
922             if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
923                 ret = -EIO;
924                 goto ro_cleanup;
925             }
926         }
927     }
928
929     if (drv->bdrv_make_empty) {
930         ret = drv->bdrv_make_empty(bs);
931         bdrv_flush(bs);
932     }
933
934     /*
935      * Make sure all data we wrote to the backing device is actually
936      * stable on disk.
937      */
938     if (bs->backing_hd)
939         bdrv_flush(bs->backing_hd);
940
941 ro_cleanup:
942     g_free(buf);
943
944     if (ro) {
945         /* re-open as RO */
946         bdrv_delete(bs->backing_hd);
947         bs->backing_hd = NULL;
948         bs_ro = bdrv_new("");
949         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
950             backing_drv);
951         if (ret < 0) {
952             bdrv_delete(bs_ro);
953             /* drive not functional anymore */
954             bs->drv = NULL;
955             return ret;
956         }
957         bs->backing_hd = bs_ro;
958         bs->backing_hd->keep_read_only = 0;
959     }
960
961     return ret;
962 }
963
964 void bdrv_commit_all(void)
965 {
966     BlockDriverState *bs;
967
968     QTAILQ_FOREACH(bs, &bdrv_states, list) {
969         bdrv_commit(bs);
970     }
971 }
972
973 /*
974  * Return values:
975  * 0        - success
976  * -EINVAL  - backing format specified, but no file
977  * -ENOSPC  - can't update the backing file because no space is left in the
978  *            image file header
979  * -ENOTSUP - format driver doesn't support changing the backing file
980  */
981 int bdrv_change_backing_file(BlockDriverState *bs,
982     const char *backing_file, const char *backing_fmt)
983 {
984     BlockDriver *drv = bs->drv;
985
986     if (drv->bdrv_change_backing_file != NULL) {
987         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
988     } else {
989         return -ENOTSUP;
990     }
991 }
992
993 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
994                                    size_t size)
995 {
996     int64_t len;
997
998     if (!bdrv_is_inserted(bs))
999         return -ENOMEDIUM;
1000
1001     if (bs->growable)
1002         return 0;
1003
1004     len = bdrv_getlength(bs);
1005
1006     if (offset < 0)
1007         return -EIO;
1008
1009     if ((offset > len) || (len - offset < size))
1010         return -EIO;
1011
1012     return 0;
1013 }
1014
1015 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1016                               int nb_sectors)
1017 {
1018     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1019                                    nb_sectors * BDRV_SECTOR_SIZE);
1020 }
1021
1022 typedef struct RwCo {
1023     BlockDriverState *bs;
1024     int64_t sector_num;
1025     int nb_sectors;
1026     QEMUIOVector *qiov;
1027     bool is_write;
1028     int ret;
1029 } RwCo;
1030
1031 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1032 {
1033     RwCo *rwco = opaque;
1034
1035     if (!rwco->is_write) {
1036         rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1037                                      rwco->nb_sectors, rwco->qiov);
1038     } else {
1039         rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1040                                       rwco->nb_sectors, rwco->qiov);
1041     }
1042 }
1043
1044 /*
1045  * Process a synchronous request using coroutines
1046  */
1047 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1048                       int nb_sectors, bool is_write)
1049 {
1050     QEMUIOVector qiov;
1051     struct iovec iov = {
1052         .iov_base = (void *)buf,
1053         .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1054     };
1055     Coroutine *co;
1056     RwCo rwco = {
1057         .bs = bs,
1058         .sector_num = sector_num,
1059         .nb_sectors = nb_sectors,
1060         .qiov = &qiov,
1061         .is_write = is_write,
1062         .ret = NOT_DONE,
1063     };
1064
1065     qemu_iovec_init_external(&qiov, &iov, 1);
1066
1067     if (qemu_in_coroutine()) {
1068         /* Fast-path if already in coroutine context */
1069         bdrv_rw_co_entry(&rwco);
1070     } else {
1071         co = qemu_coroutine_create(bdrv_rw_co_entry);
1072         qemu_coroutine_enter(co, &rwco);
1073         while (rwco.ret == NOT_DONE) {
1074             qemu_aio_wait();
1075         }
1076     }
1077     return rwco.ret;
1078 }
1079
1080 /* return < 0 if error. See bdrv_write() for the return codes */
1081 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1082               uint8_t *buf, int nb_sectors)
1083 {
1084     return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1085 }
1086
1087 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1088                              int nb_sectors, int dirty)
1089 {
1090     int64_t start, end;
1091     unsigned long val, idx, bit;
1092
1093     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1094     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1095
1096     for (; start <= end; start++) {
1097         idx = start / (sizeof(unsigned long) * 8);
1098         bit = start % (sizeof(unsigned long) * 8);
1099         val = bs->dirty_bitmap[idx];
1100         if (dirty) {
1101             if (!(val & (1UL << bit))) {
1102                 bs->dirty_count++;
1103                 val |= 1UL << bit;
1104             }
1105         } else {
1106             if (val & (1UL << bit)) {
1107                 bs->dirty_count--;
1108                 val &= ~(1UL << bit);
1109             }
1110         }
1111         bs->dirty_bitmap[idx] = val;
1112     }
1113 }
1114
1115 /* Return < 0 if error. Important errors are:
1116   -EIO         generic I/O error (may happen for all errors)
1117   -ENOMEDIUM   No media inserted.
1118   -EINVAL      Invalid sector number or nb_sectors
1119   -EACCES      Trying to write a read-only device
1120 */
1121 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1122                const uint8_t *buf, int nb_sectors)
1123 {
1124     return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1125 }
1126
1127 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1128                void *buf, int count1)
1129 {
1130     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1131     int len, nb_sectors, count;
1132     int64_t sector_num;
1133     int ret;
1134
1135     count = count1;
1136     /* first read to align to sector start */
1137     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1138     if (len > count)
1139         len = count;
1140     sector_num = offset >> BDRV_SECTOR_BITS;
1141     if (len > 0) {
1142         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1143             return ret;
1144         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1145         count -= len;
1146         if (count == 0)
1147             return count1;
1148         sector_num++;
1149         buf += len;
1150     }
1151
1152     /* read the sectors "in place" */
1153     nb_sectors = count >> BDRV_SECTOR_BITS;
1154     if (nb_sectors > 0) {
1155         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1156             return ret;
1157         sector_num += nb_sectors;
1158         len = nb_sectors << BDRV_SECTOR_BITS;
1159         buf += len;
1160         count -= len;
1161     }
1162
1163     /* add data from the last sector */
1164     if (count > 0) {
1165         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1166             return ret;
1167         memcpy(buf, tmp_buf, count);
1168     }
1169     return count1;
1170 }
1171
1172 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1173                 const void *buf, int count1)
1174 {
1175     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1176     int len, nb_sectors, count;
1177     int64_t sector_num;
1178     int ret;
1179
1180     count = count1;
1181     /* first write to align to sector start */
1182     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1183     if (len > count)
1184         len = count;
1185     sector_num = offset >> BDRV_SECTOR_BITS;
1186     if (len > 0) {
1187         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1188             return ret;
1189         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1190         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1191             return ret;
1192         count -= len;
1193         if (count == 0)
1194             return count1;
1195         sector_num++;
1196         buf += len;
1197     }
1198
1199     /* write the sectors "in place" */
1200     nb_sectors = count >> BDRV_SECTOR_BITS;
1201     if (nb_sectors > 0) {
1202         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1203             return ret;
1204         sector_num += nb_sectors;
1205         len = nb_sectors << BDRV_SECTOR_BITS;
1206         buf += len;
1207         count -= len;
1208     }
1209
1210     /* add data from the last sector */
1211     if (count > 0) {
1212         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1213             return ret;
1214         memcpy(tmp_buf, buf, count);
1215         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1216             return ret;
1217     }
1218     return count1;
1219 }
1220
1221 /*
1222  * Writes to the file and ensures that no writes are reordered across this
1223  * request (acts as a barrier)
1224  *
1225  * Returns 0 on success, -errno in error cases.
1226  */
1227 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1228     const void *buf, int count)
1229 {
1230     int ret;
1231
1232     ret = bdrv_pwrite(bs, offset, buf, count);
1233     if (ret < 0) {
1234         return ret;
1235     }
1236
1237     /* No flush needed for cache modes that use O_DSYNC */
1238     if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1239         bdrv_flush(bs);
1240     }
1241
1242     return 0;
1243 }
1244
1245 /*
1246  * Handle a read request in coroutine context
1247  */
1248 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1249     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1250 {
1251     BlockDriver *drv = bs->drv;
1252
1253     if (!drv) {
1254         return -ENOMEDIUM;
1255     }
1256     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1257         return -EIO;
1258     }
1259
1260     return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1261 }
1262
1263 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1264     int nb_sectors, QEMUIOVector *qiov)
1265 {
1266     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1267
1268     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1269 }
1270
1271 /*
1272  * Handle a write request in coroutine context
1273  */
1274 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1275     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1276 {
1277     BlockDriver *drv = bs->drv;
1278     int ret;
1279
1280     if (!bs->drv) {
1281         return -ENOMEDIUM;
1282     }
1283     if (bs->read_only) {
1284         return -EACCES;
1285     }
1286     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1287         return -EIO;
1288     }
1289
1290     ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1291
1292     if (bs->dirty_bitmap) {
1293         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1294     }
1295
1296     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1297         bs->wr_highest_sector = sector_num + nb_sectors - 1;
1298     }
1299
1300     return ret;
1301 }
1302
1303 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1304     int nb_sectors, QEMUIOVector *qiov)
1305 {
1306     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1307
1308     return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1309 }
1310
1311 /**
1312  * Truncate file to 'offset' bytes (needed only for file protocols)
1313  */
1314 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1315 {
1316     BlockDriver *drv = bs->drv;
1317     int ret;
1318     if (!drv)
1319         return -ENOMEDIUM;
1320     if (!drv->bdrv_truncate)
1321         return -ENOTSUP;
1322     if (bs->read_only)
1323         return -EACCES;
1324     if (bdrv_in_use(bs))
1325         return -EBUSY;
1326     ret = drv->bdrv_truncate(bs, offset);
1327     if (ret == 0) {
1328         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1329         bdrv_dev_resize_cb(bs);
1330     }
1331     return ret;
1332 }
1333
1334 /**
1335  * Length of a allocated file in bytes. Sparse files are counted by actual
1336  * allocated space. Return < 0 if error or unknown.
1337  */
1338 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1339 {
1340     BlockDriver *drv = bs->drv;
1341     if (!drv) {
1342         return -ENOMEDIUM;
1343     }
1344     if (drv->bdrv_get_allocated_file_size) {
1345         return drv->bdrv_get_allocated_file_size(bs);
1346     }
1347     if (bs->file) {
1348         return bdrv_get_allocated_file_size(bs->file);
1349     }
1350     return -ENOTSUP;
1351 }
1352
1353 /**
1354  * Length of a file in bytes. Return < 0 if error or unknown.
1355  */
1356 int64_t bdrv_getlength(BlockDriverState *bs)
1357 {
1358     BlockDriver *drv = bs->drv;
1359     if (!drv)
1360         return -ENOMEDIUM;
1361
1362     if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1363         if (drv->bdrv_getlength) {
1364             return drv->bdrv_getlength(bs);
1365         }
1366     }
1367     return bs->total_sectors * BDRV_SECTOR_SIZE;
1368 }
1369
1370 /* return 0 as number of sectors if no device present or error */
1371 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1372 {
1373     int64_t length;
1374     length = bdrv_getlength(bs);
1375     if (length < 0)
1376         length = 0;
1377     else
1378         length = length >> BDRV_SECTOR_BITS;
1379     *nb_sectors_ptr = length;
1380 }
1381
1382 struct partition {
1383         uint8_t boot_ind;           /* 0x80 - active */
1384         uint8_t head;               /* starting head */
1385         uint8_t sector;             /* starting sector */
1386         uint8_t cyl;                /* starting cylinder */
1387         uint8_t sys_ind;            /* What partition type */
1388         uint8_t end_head;           /* end head */
1389         uint8_t end_sector;         /* end sector */
1390         uint8_t end_cyl;            /* end cylinder */
1391         uint32_t start_sect;        /* starting sector counting from 0 */
1392         uint32_t nr_sects;          /* nr of sectors in partition */
1393 } QEMU_PACKED;
1394
1395 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1396 static int guess_disk_lchs(BlockDriverState *bs,
1397                            int *pcylinders, int *pheads, int *psectors)
1398 {
1399     uint8_t buf[BDRV_SECTOR_SIZE];
1400     int ret, i, heads, sectors, cylinders;
1401     struct partition *p;
1402     uint32_t nr_sects;
1403     uint64_t nb_sectors;
1404
1405     bdrv_get_geometry(bs, &nb_sectors);
1406
1407     ret = bdrv_read(bs, 0, buf, 1);
1408     if (ret < 0)
1409         return -1;
1410     /* test msdos magic */
1411     if (buf[510] != 0x55 || buf[511] != 0xaa)
1412         return -1;
1413     for(i = 0; i < 4; i++) {
1414         p = ((struct partition *)(buf + 0x1be)) + i;
1415         nr_sects = le32_to_cpu(p->nr_sects);
1416         if (nr_sects && p->end_head) {
1417             /* We make the assumption that the partition terminates on
1418                a cylinder boundary */
1419             heads = p->end_head + 1;
1420             sectors = p->end_sector & 63;
1421             if (sectors == 0)
1422                 continue;
1423             cylinders = nb_sectors / (heads * sectors);
1424             if (cylinders < 1 || cylinders > 16383)
1425                 continue;
1426             *pheads = heads;
1427             *psectors = sectors;
1428             *pcylinders = cylinders;
1429 #if 0
1430             printf("guessed geometry: LCHS=%d %d %d\n",
1431                    cylinders, heads, sectors);
1432 #endif
1433             return 0;
1434         }
1435     }
1436     return -1;
1437 }
1438
1439 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1440 {
1441     int translation, lba_detected = 0;
1442     int cylinders, heads, secs;
1443     uint64_t nb_sectors;
1444
1445     /* if a geometry hint is available, use it */
1446     bdrv_get_geometry(bs, &nb_sectors);
1447     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1448     translation = bdrv_get_translation_hint(bs);
1449     if (cylinders != 0) {
1450         *pcyls = cylinders;
1451         *pheads = heads;
1452         *psecs = secs;
1453     } else {
1454         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1455             if (heads > 16) {
1456                 /* if heads > 16, it means that a BIOS LBA
1457                    translation was active, so the default
1458                    hardware geometry is OK */
1459                 lba_detected = 1;
1460                 goto default_geometry;
1461             } else {
1462                 *pcyls = cylinders;
1463                 *pheads = heads;
1464                 *psecs = secs;
1465                 /* disable any translation to be in sync with
1466                    the logical geometry */
1467                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1468                     bdrv_set_translation_hint(bs,
1469                                               BIOS_ATA_TRANSLATION_NONE);
1470                 }
1471             }
1472         } else {
1473         default_geometry:
1474             /* if no geometry, use a standard physical disk geometry */
1475             cylinders = nb_sectors / (16 * 63);
1476
1477             if (cylinders > 16383)
1478                 cylinders = 16383;
1479             else if (cylinders < 2)
1480                 cylinders = 2;
1481             *pcyls = cylinders;
1482             *pheads = 16;
1483             *psecs = 63;
1484             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1485                 if ((*pcyls * *pheads) <= 131072) {
1486                     bdrv_set_translation_hint(bs,
1487                                               BIOS_ATA_TRANSLATION_LARGE);
1488                 } else {
1489                     bdrv_set_translation_hint(bs,
1490                                               BIOS_ATA_TRANSLATION_LBA);
1491                 }
1492             }
1493         }
1494         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1495     }
1496 }
1497
1498 void bdrv_set_geometry_hint(BlockDriverState *bs,
1499                             int cyls, int heads, int secs)
1500 {
1501     bs->cyls = cyls;
1502     bs->heads = heads;
1503     bs->secs = secs;
1504 }
1505
1506 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1507 {
1508     bs->translation = translation;
1509 }
1510
1511 void bdrv_get_geometry_hint(BlockDriverState *bs,
1512                             int *pcyls, int *pheads, int *psecs)
1513 {
1514     *pcyls = bs->cyls;
1515     *pheads = bs->heads;
1516     *psecs = bs->secs;
1517 }
1518
1519 /* Recognize floppy formats */
1520 typedef struct FDFormat {
1521     FDriveType drive;
1522     uint8_t last_sect;
1523     uint8_t max_track;
1524     uint8_t max_head;
1525 } FDFormat;
1526
1527 static const FDFormat fd_formats[] = {
1528     /* First entry is default format */
1529     /* 1.44 MB 3"1/2 floppy disks */
1530     { FDRIVE_DRV_144, 18, 80, 1, },
1531     { FDRIVE_DRV_144, 20, 80, 1, },
1532     { FDRIVE_DRV_144, 21, 80, 1, },
1533     { FDRIVE_DRV_144, 21, 82, 1, },
1534     { FDRIVE_DRV_144, 21, 83, 1, },
1535     { FDRIVE_DRV_144, 22, 80, 1, },
1536     { FDRIVE_DRV_144, 23, 80, 1, },
1537     { FDRIVE_DRV_144, 24, 80, 1, },
1538     /* 2.88 MB 3"1/2 floppy disks */
1539     { FDRIVE_DRV_288, 36, 80, 1, },
1540     { FDRIVE_DRV_288, 39, 80, 1, },
1541     { FDRIVE_DRV_288, 40, 80, 1, },
1542     { FDRIVE_DRV_288, 44, 80, 1, },
1543     { FDRIVE_DRV_288, 48, 80, 1, },
1544     /* 720 kB 3"1/2 floppy disks */
1545     { FDRIVE_DRV_144,  9, 80, 1, },
1546     { FDRIVE_DRV_144, 10, 80, 1, },
1547     { FDRIVE_DRV_144, 10, 82, 1, },
1548     { FDRIVE_DRV_144, 10, 83, 1, },
1549     { FDRIVE_DRV_144, 13, 80, 1, },
1550     { FDRIVE_DRV_144, 14, 80, 1, },
1551     /* 1.2 MB 5"1/4 floppy disks */
1552     { FDRIVE_DRV_120, 15, 80, 1, },
1553     { FDRIVE_DRV_120, 18, 80, 1, },
1554     { FDRIVE_DRV_120, 18, 82, 1, },
1555     { FDRIVE_DRV_120, 18, 83, 1, },
1556     { FDRIVE_DRV_120, 20, 80, 1, },
1557     /* 720 kB 5"1/4 floppy disks */
1558     { FDRIVE_DRV_120,  9, 80, 1, },
1559     { FDRIVE_DRV_120, 11, 80, 1, },
1560     /* 360 kB 5"1/4 floppy disks */
1561     { FDRIVE_DRV_120,  9, 40, 1, },
1562     { FDRIVE_DRV_120,  9, 40, 0, },
1563     { FDRIVE_DRV_120, 10, 41, 1, },
1564     { FDRIVE_DRV_120, 10, 42, 1, },
1565     /* 320 kB 5"1/4 floppy disks */
1566     { FDRIVE_DRV_120,  8, 40, 1, },
1567     { FDRIVE_DRV_120,  8, 40, 0, },
1568     /* 360 kB must match 5"1/4 better than 3"1/2... */
1569     { FDRIVE_DRV_144,  9, 80, 0, },
1570     /* end */
1571     { FDRIVE_DRV_NONE, -1, -1, 0, },
1572 };
1573
1574 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1575                                    int *max_track, int *last_sect,
1576                                    FDriveType drive_in, FDriveType *drive)
1577 {
1578     const FDFormat *parse;
1579     uint64_t nb_sectors, size;
1580     int i, first_match, match;
1581
1582     bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1583     if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1584         /* User defined disk */
1585     } else {
1586         bdrv_get_geometry(bs, &nb_sectors);
1587         match = -1;
1588         first_match = -1;
1589         for (i = 0; ; i++) {
1590             parse = &fd_formats[i];
1591             if (parse->drive == FDRIVE_DRV_NONE) {
1592                 break;
1593             }
1594             if (drive_in == parse->drive ||
1595                 drive_in == FDRIVE_DRV_NONE) {
1596                 size = (parse->max_head + 1) * parse->max_track *
1597                     parse->last_sect;
1598                 if (nb_sectors == size) {
1599                     match = i;
1600                     break;
1601                 }
1602                 if (first_match == -1) {
1603                     first_match = i;
1604                 }
1605             }
1606         }
1607         if (match == -1) {
1608             if (first_match == -1) {
1609                 match = 1;
1610             } else {
1611                 match = first_match;
1612             }
1613             parse = &fd_formats[match];
1614         }
1615         *nb_heads = parse->max_head + 1;
1616         *max_track = parse->max_track;
1617         *last_sect = parse->last_sect;
1618         *drive = parse->drive;
1619     }
1620 }
1621
1622 int bdrv_get_translation_hint(BlockDriverState *bs)
1623 {
1624     return bs->translation;
1625 }
1626
1627 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1628                        BlockErrorAction on_write_error)
1629 {
1630     bs->on_read_error = on_read_error;
1631     bs->on_write_error = on_write_error;
1632 }
1633
1634 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1635 {
1636     return is_read ? bs->on_read_error : bs->on_write_error;
1637 }
1638
1639 int bdrv_is_read_only(BlockDriverState *bs)
1640 {
1641     return bs->read_only;
1642 }
1643
1644 int bdrv_is_sg(BlockDriverState *bs)
1645 {
1646     return bs->sg;
1647 }
1648
1649 int bdrv_enable_write_cache(BlockDriverState *bs)
1650 {
1651     return bs->enable_write_cache;
1652 }
1653
1654 int bdrv_is_encrypted(BlockDriverState *bs)
1655 {
1656     if (bs->backing_hd && bs->backing_hd->encrypted)
1657         return 1;
1658     return bs->encrypted;
1659 }
1660
1661 int bdrv_key_required(BlockDriverState *bs)
1662 {
1663     BlockDriverState *backing_hd = bs->backing_hd;
1664
1665     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1666         return 1;
1667     return (bs->encrypted && !bs->valid_key);
1668 }
1669
1670 int bdrv_set_key(BlockDriverState *bs, const char *key)
1671 {
1672     int ret;
1673     if (bs->backing_hd && bs->backing_hd->encrypted) {
1674         ret = bdrv_set_key(bs->backing_hd, key);
1675         if (ret < 0)
1676             return ret;
1677         if (!bs->encrypted)
1678             return 0;
1679     }
1680     if (!bs->encrypted) {
1681         return -EINVAL;
1682     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1683         return -ENOMEDIUM;
1684     }
1685     ret = bs->drv->bdrv_set_key(bs, key);
1686     if (ret < 0) {
1687         bs->valid_key = 0;
1688     } else if (!bs->valid_key) {
1689         bs->valid_key = 1;
1690         /* call the change callback now, we skipped it on open */
1691         bdrv_dev_change_media_cb(bs, true);
1692     }
1693     return ret;
1694 }
1695
1696 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1697 {
1698     if (!bs->drv) {
1699         buf[0] = '\0';
1700     } else {
1701         pstrcpy(buf, buf_size, bs->drv->format_name);
1702     }
1703 }
1704
1705 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1706                          void *opaque)
1707 {
1708     BlockDriver *drv;
1709
1710     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1711         it(opaque, drv->format_name);
1712     }
1713 }
1714
1715 BlockDriverState *bdrv_find(const char *name)
1716 {
1717     BlockDriverState *bs;
1718
1719     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1720         if (!strcmp(name, bs->device_name)) {
1721             return bs;
1722         }
1723     }
1724     return NULL;
1725 }
1726
1727 BlockDriverState *bdrv_next(BlockDriverState *bs)
1728 {
1729     if (!bs) {
1730         return QTAILQ_FIRST(&bdrv_states);
1731     }
1732     return QTAILQ_NEXT(bs, list);
1733 }
1734
1735 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1736 {
1737     BlockDriverState *bs;
1738
1739     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1740         it(opaque, bs);
1741     }
1742 }
1743
1744 const char *bdrv_get_device_name(BlockDriverState *bs)
1745 {
1746     return bs->device_name;
1747 }
1748
1749 void bdrv_flush_all(void)
1750 {
1751     BlockDriverState *bs;
1752
1753     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1754         if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1755             bdrv_flush(bs);
1756         }
1757     }
1758 }
1759
1760 int bdrv_has_zero_init(BlockDriverState *bs)
1761 {
1762     assert(bs->drv);
1763
1764     if (bs->drv->bdrv_has_zero_init) {
1765         return bs->drv->bdrv_has_zero_init(bs);
1766     }
1767
1768     return 1;
1769 }
1770
1771 /*
1772  * Returns true iff the specified sector is present in the disk image. Drivers
1773  * not implementing the functionality are assumed to not support backing files,
1774  * hence all their sectors are reported as allocated.
1775  *
1776  * 'pnum' is set to the number of sectors (including and immediately following
1777  * the specified sector) that are known to be in the same
1778  * allocated/unallocated state.
1779  *
1780  * 'nb_sectors' is the max value 'pnum' should be set to.
1781  */
1782 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1783         int *pnum)
1784 {
1785     int64_t n;
1786     if (!bs->drv->bdrv_is_allocated) {
1787         if (sector_num >= bs->total_sectors) {
1788             *pnum = 0;
1789             return 0;
1790         }
1791         n = bs->total_sectors - sector_num;
1792         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1793         return 1;
1794     }
1795     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1796 }
1797
1798 void bdrv_mon_event(const BlockDriverState *bdrv,
1799                     BlockMonEventAction action, int is_read)
1800 {
1801     QObject *data;
1802     const char *action_str;
1803
1804     switch (action) {
1805     case BDRV_ACTION_REPORT:
1806         action_str = "report";
1807         break;
1808     case BDRV_ACTION_IGNORE:
1809         action_str = "ignore";
1810         break;
1811     case BDRV_ACTION_STOP:
1812         action_str = "stop";
1813         break;
1814     default:
1815         abort();
1816     }
1817
1818     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1819                               bdrv->device_name,
1820                               action_str,
1821                               is_read ? "read" : "write");
1822     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1823
1824     qobject_decref(data);
1825 }
1826
1827 static void bdrv_print_dict(QObject *obj, void *opaque)
1828 {
1829     QDict *bs_dict;
1830     Monitor *mon = opaque;
1831
1832     bs_dict = qobject_to_qdict(obj);
1833
1834     monitor_printf(mon, "%s: removable=%d",
1835                         qdict_get_str(bs_dict, "device"),
1836                         qdict_get_bool(bs_dict, "removable"));
1837
1838     if (qdict_get_bool(bs_dict, "removable")) {
1839         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1840         monitor_printf(mon, " tray-open=%d",
1841                        qdict_get_bool(bs_dict, "tray-open"));
1842     }
1843
1844     if (qdict_haskey(bs_dict, "io-status")) {
1845         monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1846     }
1847
1848     if (qdict_haskey(bs_dict, "inserted")) {
1849         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1850
1851         monitor_printf(mon, " file=");
1852         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1853         if (qdict_haskey(qdict, "backing_file")) {
1854             monitor_printf(mon, " backing_file=");
1855             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1856         }
1857         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1858                             qdict_get_bool(qdict, "ro"),
1859                             qdict_get_str(qdict, "drv"),
1860                             qdict_get_bool(qdict, "encrypted"));
1861     } else {
1862         monitor_printf(mon, " [not inserted]");
1863     }
1864
1865     monitor_printf(mon, "\n");
1866 }
1867
1868 void bdrv_info_print(Monitor *mon, const QObject *data)
1869 {
1870     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1871 }
1872
1873 static const char *const io_status_name[BDRV_IOS_MAX] = {
1874     [BDRV_IOS_OK] = "ok",
1875     [BDRV_IOS_FAILED] = "failed",
1876     [BDRV_IOS_ENOSPC] = "nospace",
1877 };
1878
1879 void bdrv_info(Monitor *mon, QObject **ret_data)
1880 {
1881     QList *bs_list;
1882     BlockDriverState *bs;
1883
1884     bs_list = qlist_new();
1885
1886     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1887         QObject *bs_obj;
1888         QDict *bs_dict;
1889
1890         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1891                                     "'removable': %i, 'locked': %i }",
1892                                     bs->device_name,
1893                                     bdrv_dev_has_removable_media(bs),
1894                                     bdrv_dev_is_medium_locked(bs));
1895         bs_dict = qobject_to_qdict(bs_obj);
1896
1897         if (bdrv_dev_has_removable_media(bs)) {
1898             qdict_put(bs_dict, "tray-open",
1899                       qbool_from_int(bdrv_dev_is_tray_open(bs)));
1900         }
1901
1902         if (bdrv_iostatus_is_enabled(bs)) {
1903             qdict_put(bs_dict, "io-status",
1904                       qstring_from_str(io_status_name[bs->iostatus]));
1905         }
1906
1907         if (bs->drv) {
1908             QObject *obj;
1909
1910             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1911                                      "'encrypted': %i }",
1912                                      bs->filename, bs->read_only,
1913                                      bs->drv->format_name,
1914                                      bdrv_is_encrypted(bs));
1915             if (bs->backing_file[0] != '\0') {
1916                 QDict *qdict = qobject_to_qdict(obj);
1917                 qdict_put(qdict, "backing_file",
1918                           qstring_from_str(bs->backing_file));
1919             }
1920
1921             qdict_put_obj(bs_dict, "inserted", obj);
1922         }
1923         qlist_append_obj(bs_list, bs_obj);
1924     }
1925
1926     *ret_data = QOBJECT(bs_list);
1927 }
1928
1929 static void bdrv_stats_iter(QObject *data, void *opaque)
1930 {
1931     QDict *qdict;
1932     Monitor *mon = opaque;
1933
1934     qdict = qobject_to_qdict(data);
1935     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1936
1937     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1938     monitor_printf(mon, " rd_bytes=%" PRId64
1939                         " wr_bytes=%" PRId64
1940                         " rd_operations=%" PRId64
1941                         " wr_operations=%" PRId64
1942                         " flush_operations=%" PRId64
1943                         " wr_total_time_ns=%" PRId64
1944                         " rd_total_time_ns=%" PRId64
1945                         " flush_total_time_ns=%" PRId64
1946                         "\n",
1947                         qdict_get_int(qdict, "rd_bytes"),
1948                         qdict_get_int(qdict, "wr_bytes"),
1949                         qdict_get_int(qdict, "rd_operations"),
1950                         qdict_get_int(qdict, "wr_operations"),
1951                         qdict_get_int(qdict, "flush_operations"),
1952                         qdict_get_int(qdict, "wr_total_time_ns"),
1953                         qdict_get_int(qdict, "rd_total_time_ns"),
1954                         qdict_get_int(qdict, "flush_total_time_ns"));
1955 }
1956
1957 void bdrv_stats_print(Monitor *mon, const QObject *data)
1958 {
1959     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1960 }
1961
1962 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1963 {
1964     QObject *res;
1965     QDict *dict;
1966
1967     res = qobject_from_jsonf("{ 'stats': {"
1968                              "'rd_bytes': %" PRId64 ","
1969                              "'wr_bytes': %" PRId64 ","
1970                              "'rd_operations': %" PRId64 ","
1971                              "'wr_operations': %" PRId64 ","
1972                              "'wr_highest_offset': %" PRId64 ","
1973                              "'flush_operations': %" PRId64 ","
1974                              "'wr_total_time_ns': %" PRId64 ","
1975                              "'rd_total_time_ns': %" PRId64 ","
1976                              "'flush_total_time_ns': %" PRId64
1977                              "} }",
1978                              bs->nr_bytes[BDRV_ACCT_READ],
1979                              bs->nr_bytes[BDRV_ACCT_WRITE],
1980                              bs->nr_ops[BDRV_ACCT_READ],
1981                              bs->nr_ops[BDRV_ACCT_WRITE],
1982                              bs->wr_highest_sector *
1983                              (uint64_t)BDRV_SECTOR_SIZE,
1984                              bs->nr_ops[BDRV_ACCT_FLUSH],
1985                              bs->total_time_ns[BDRV_ACCT_WRITE],
1986                              bs->total_time_ns[BDRV_ACCT_READ],
1987                              bs->total_time_ns[BDRV_ACCT_FLUSH]);
1988     dict  = qobject_to_qdict(res);
1989
1990     if (*bs->device_name) {
1991         qdict_put(dict, "device", qstring_from_str(bs->device_name));
1992     }
1993
1994     if (bs->file) {
1995         QObject *parent = bdrv_info_stats_bs(bs->file);
1996         qdict_put_obj(dict, "parent", parent);
1997     }
1998
1999     return res;
2000 }
2001
2002 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2003 {
2004     QObject *obj;
2005     QList *devices;
2006     BlockDriverState *bs;
2007
2008     devices = qlist_new();
2009
2010     QTAILQ_FOREACH(bs, &bdrv_states, list) {
2011         obj = bdrv_info_stats_bs(bs);
2012         qlist_append_obj(devices, obj);
2013     }
2014
2015     *ret_data = QOBJECT(devices);
2016 }
2017
2018 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2019 {
2020     if (bs->backing_hd && bs->backing_hd->encrypted)
2021         return bs->backing_file;
2022     else if (bs->encrypted)
2023         return bs->filename;
2024     else
2025         return NULL;
2026 }
2027
2028 void bdrv_get_backing_filename(BlockDriverState *bs,
2029                                char *filename, int filename_size)
2030 {
2031     if (!bs->backing_file) {
2032         pstrcpy(filename, filename_size, "");
2033     } else {
2034         pstrcpy(filename, filename_size, bs->backing_file);
2035     }
2036 }
2037
2038 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2039                           const uint8_t *buf, int nb_sectors)
2040 {
2041     BlockDriver *drv = bs->drv;
2042     if (!drv)
2043         return -ENOMEDIUM;
2044     if (!drv->bdrv_write_compressed)
2045         return -ENOTSUP;
2046     if (bdrv_check_request(bs, sector_num, nb_sectors))
2047         return -EIO;
2048
2049     if (bs->dirty_bitmap) {
2050         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2051     }
2052
2053     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2054 }
2055
2056 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2057 {
2058     BlockDriver *drv = bs->drv;
2059     if (!drv)
2060         return -ENOMEDIUM;
2061     if (!drv->bdrv_get_info)
2062         return -ENOTSUP;
2063     memset(bdi, 0, sizeof(*bdi));
2064     return drv->bdrv_get_info(bs, bdi);
2065 }
2066
2067 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2068                       int64_t pos, int size)
2069 {
2070     BlockDriver *drv = bs->drv;
2071     if (!drv)
2072         return -ENOMEDIUM;
2073     if (drv->bdrv_save_vmstate)
2074         return drv->bdrv_save_vmstate(bs, buf, pos, size);
2075     if (bs->file)
2076         return bdrv_save_vmstate(bs->file, buf, pos, size);
2077     return -ENOTSUP;
2078 }
2079
2080 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2081                       int64_t pos, int size)
2082 {
2083     BlockDriver *drv = bs->drv;
2084     if (!drv)
2085         return -ENOMEDIUM;
2086     if (drv->bdrv_load_vmstate)
2087         return drv->bdrv_load_vmstate(bs, buf, pos, size);
2088     if (bs->file)
2089         return bdrv_load_vmstate(bs->file, buf, pos, size);
2090     return -ENOTSUP;
2091 }
2092
2093 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2094 {
2095     BlockDriver *drv = bs->drv;
2096
2097     if (!drv || !drv->bdrv_debug_event) {
2098         return;
2099     }
2100
2101     return drv->bdrv_debug_event(bs, event);
2102
2103 }
2104
2105 /**************************************************************/
2106 /* handling of snapshots */
2107
2108 int bdrv_can_snapshot(BlockDriverState *bs)
2109 {
2110     BlockDriver *drv = bs->drv;
2111     if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2112         return 0;
2113     }
2114
2115     if (!drv->bdrv_snapshot_create) {
2116         if (bs->file != NULL) {
2117             return bdrv_can_snapshot(bs->file);
2118         }
2119         return 0;
2120     }
2121
2122     return 1;
2123 }
2124
2125 int bdrv_is_snapshot(BlockDriverState *bs)
2126 {
2127     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2128 }
2129
2130 BlockDriverState *bdrv_snapshots(void)
2131 {
2132     BlockDriverState *bs;
2133
2134     if (bs_snapshots) {
2135         return bs_snapshots;
2136     }
2137
2138     bs = NULL;
2139     while ((bs = bdrv_next(bs))) {
2140         if (bdrv_can_snapshot(bs)) {
2141             bs_snapshots = bs;
2142             return bs;
2143         }
2144     }
2145     return NULL;
2146 }
2147
2148 int bdrv_snapshot_create(BlockDriverState *bs,
2149                          QEMUSnapshotInfo *sn_info)
2150 {
2151     BlockDriver *drv = bs->drv;
2152     if (!drv)
2153         return -ENOMEDIUM;
2154     if (drv->bdrv_snapshot_create)
2155         return drv->bdrv_snapshot_create(bs, sn_info);
2156     if (bs->file)
2157         return bdrv_snapshot_create(bs->file, sn_info);
2158     return -ENOTSUP;
2159 }
2160
2161 int bdrv_snapshot_goto(BlockDriverState *bs,
2162                        const char *snapshot_id)
2163 {
2164     BlockDriver *drv = bs->drv;
2165     int ret, open_ret;
2166
2167     if (!drv)
2168         return -ENOMEDIUM;
2169     if (drv->bdrv_snapshot_goto)
2170         return drv->bdrv_snapshot_goto(bs, snapshot_id);
2171
2172     if (bs->file) {
2173         drv->bdrv_close(bs);
2174         ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2175         open_ret = drv->bdrv_open(bs, bs->open_flags);
2176         if (open_ret < 0) {
2177             bdrv_delete(bs->file);
2178             bs->drv = NULL;
2179             return open_ret;
2180         }
2181         return ret;
2182     }
2183
2184     return -ENOTSUP;
2185 }
2186
2187 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2188 {
2189     BlockDriver *drv = bs->drv;
2190     if (!drv)
2191         return -ENOMEDIUM;
2192     if (drv->bdrv_snapshot_delete)
2193         return drv->bdrv_snapshot_delete(bs, snapshot_id);
2194     if (bs->file)
2195         return bdrv_snapshot_delete(bs->file, snapshot_id);
2196     return -ENOTSUP;
2197 }
2198
2199 int bdrv_snapshot_list(BlockDriverState *bs,
2200                        QEMUSnapshotInfo **psn_info)
2201 {
2202     BlockDriver *drv = bs->drv;
2203     if (!drv)
2204         return -ENOMEDIUM;
2205     if (drv->bdrv_snapshot_list)
2206         return drv->bdrv_snapshot_list(bs, psn_info);
2207     if (bs->file)
2208         return bdrv_snapshot_list(bs->file, psn_info);
2209     return -ENOTSUP;
2210 }
2211
2212 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2213         const char *snapshot_name)
2214 {
2215     BlockDriver *drv = bs->drv;
2216     if (!drv) {
2217         return -ENOMEDIUM;
2218     }
2219     if (!bs->read_only) {
2220         return -EINVAL;
2221     }
2222     if (drv->bdrv_snapshot_load_tmp) {
2223         return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2224     }
2225     return -ENOTSUP;
2226 }
2227
2228 #define NB_SUFFIXES 4
2229
2230 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2231 {
2232     static const char suffixes[NB_SUFFIXES] = "KMGT";
2233     int64_t base;
2234     int i;
2235
2236     if (size <= 999) {
2237         snprintf(buf, buf_size, "%" PRId64, size);
2238     } else {
2239         base = 1024;
2240         for(i = 0; i < NB_SUFFIXES; i++) {
2241             if (size < (10 * base)) {
2242                 snprintf(buf, buf_size, "%0.1f%c",
2243                          (double)size / base,
2244                          suffixes[i]);
2245                 break;
2246             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2247                 snprintf(buf, buf_size, "%" PRId64 "%c",
2248                          ((size + (base >> 1)) / base),
2249                          suffixes[i]);
2250                 break;
2251             }
2252             base = base * 1024;
2253         }
2254     }
2255     return buf;
2256 }
2257
2258 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2259 {
2260     char buf1[128], date_buf[128], clock_buf[128];
2261 #ifdef _WIN32
2262     struct tm *ptm;
2263 #else
2264     struct tm tm;
2265 #endif
2266     time_t ti;
2267     int64_t secs;
2268
2269     if (!sn) {
2270         snprintf(buf, buf_size,
2271                  "%-10s%-20s%7s%20s%15s",
2272                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2273     } else {
2274         ti = sn->date_sec;
2275 #ifdef _WIN32
2276         ptm = localtime(&ti);
2277         strftime(date_buf, sizeof(date_buf),
2278                  "%Y-%m-%d %H:%M:%S", ptm);
2279 #else
2280         localtime_r(&ti, &tm);
2281         strftime(date_buf, sizeof(date_buf),
2282                  "%Y-%m-%d %H:%M:%S", &tm);
2283 #endif
2284         secs = sn->vm_clock_nsec / 1000000000;
2285         snprintf(clock_buf, sizeof(clock_buf),
2286                  "%02d:%02d:%02d.%03d",
2287                  (int)(secs / 3600),
2288                  (int)((secs / 60) % 60),
2289                  (int)(secs % 60),
2290                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
2291         snprintf(buf, buf_size,
2292                  "%-10s%-20s%7s%20s%15s",
2293                  sn->id_str, sn->name,
2294                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2295                  date_buf,
2296                  clock_buf);
2297     }
2298     return buf;
2299 }
2300
2301 /**************************************************************/
2302 /* async I/Os */
2303
2304 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2305                                  QEMUIOVector *qiov, int nb_sectors,
2306                                  BlockDriverCompletionFunc *cb, void *opaque)
2307 {
2308     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2309
2310     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2311                                  cb, opaque, false);
2312 }
2313
2314 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2315                                   QEMUIOVector *qiov, int nb_sectors,
2316                                   BlockDriverCompletionFunc *cb, void *opaque)
2317 {
2318     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2319
2320     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2321                                  cb, opaque, true);
2322 }
2323
2324
2325 typedef struct MultiwriteCB {
2326     int error;
2327     int num_requests;
2328     int num_callbacks;
2329     struct {
2330         BlockDriverCompletionFunc *cb;
2331         void *opaque;
2332         QEMUIOVector *free_qiov;
2333         void *free_buf;
2334     } callbacks[];
2335 } MultiwriteCB;
2336
2337 static void multiwrite_user_cb(MultiwriteCB *mcb)
2338 {
2339     int i;
2340
2341     for (i = 0; i < mcb->num_callbacks; i++) {
2342         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2343         if (mcb->callbacks[i].free_qiov) {
2344             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2345         }
2346         g_free(mcb->callbacks[i].free_qiov);
2347         qemu_vfree(mcb->callbacks[i].free_buf);
2348     }
2349 }
2350
2351 static void multiwrite_cb(void *opaque, int ret)
2352 {
2353     MultiwriteCB *mcb = opaque;
2354
2355     trace_multiwrite_cb(mcb, ret);
2356
2357     if (ret < 0 && !mcb->error) {
2358         mcb->error = ret;
2359     }
2360
2361     mcb->num_requests--;
2362     if (mcb->num_requests == 0) {
2363         multiwrite_user_cb(mcb);
2364         g_free(mcb);
2365     }
2366 }
2367
2368 static int multiwrite_req_compare(const void *a, const void *b)
2369 {
2370     const BlockRequest *req1 = a, *req2 = b;
2371
2372     /*
2373      * Note that we can't simply subtract req2->sector from req1->sector
2374      * here as that could overflow the return value.
2375      */
2376     if (req1->sector > req2->sector) {
2377         return 1;
2378     } else if (req1->sector < req2->sector) {
2379         return -1;
2380     } else {
2381         return 0;
2382     }
2383 }
2384
2385 /*
2386  * Takes a bunch of requests and tries to merge them. Returns the number of
2387  * requests that remain after merging.
2388  */
2389 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2390     int num_reqs, MultiwriteCB *mcb)
2391 {
2392     int i, outidx;
2393
2394     // Sort requests by start sector
2395     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2396
2397     // Check if adjacent requests touch the same clusters. If so, combine them,
2398     // filling up gaps with zero sectors.
2399     outidx = 0;
2400     for (i = 1; i < num_reqs; i++) {
2401         int merge = 0;
2402         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2403
2404         // This handles the cases that are valid for all block drivers, namely
2405         // exactly sequential writes and overlapping writes.
2406         if (reqs[i].sector <= oldreq_last) {
2407             merge = 1;
2408         }
2409
2410         // The block driver may decide that it makes sense to combine requests
2411         // even if there is a gap of some sectors between them. In this case,
2412         // the gap is filled with zeros (therefore only applicable for yet
2413         // unused space in format like qcow2).
2414         if (!merge && bs->drv->bdrv_merge_requests) {
2415             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2416         }
2417
2418         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2419             merge = 0;
2420         }
2421
2422         if (merge) {
2423             size_t size;
2424             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2425             qemu_iovec_init(qiov,
2426                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2427
2428             // Add the first request to the merged one. If the requests are
2429             // overlapping, drop the last sectors of the first request.
2430             size = (reqs[i].sector - reqs[outidx].sector) << 9;
2431             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2432
2433             // We might need to add some zeros between the two requests
2434             if (reqs[i].sector > oldreq_last) {
2435                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2436                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2437                 memset(buf, 0, zero_bytes);
2438                 qemu_iovec_add(qiov, buf, zero_bytes);
2439                 mcb->callbacks[i].free_buf = buf;
2440             }
2441
2442             // Add the second request
2443             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2444
2445             reqs[outidx].nb_sectors = qiov->size >> 9;
2446             reqs[outidx].qiov = qiov;
2447
2448             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2449         } else {
2450             outidx++;
2451             reqs[outidx].sector     = reqs[i].sector;
2452             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2453             reqs[outidx].qiov       = reqs[i].qiov;
2454         }
2455     }
2456
2457     return outidx + 1;
2458 }
2459
2460 /*
2461  * Submit multiple AIO write requests at once.
2462  *
2463  * On success, the function returns 0 and all requests in the reqs array have
2464  * been submitted. In error case this function returns -1, and any of the
2465  * requests may or may not be submitted yet. In particular, this means that the
2466  * callback will be called for some of the requests, for others it won't. The
2467  * caller must check the error field of the BlockRequest to wait for the right
2468  * callbacks (if error != 0, no callback will be called).
2469  *
2470  * The implementation may modify the contents of the reqs array, e.g. to merge
2471  * requests. However, the fields opaque and error are left unmodified as they
2472  * are used to signal failure for a single request to the caller.
2473  */
2474 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2475 {
2476     BlockDriverAIOCB *acb;
2477     MultiwriteCB *mcb;
2478     int i;
2479
2480     /* don't submit writes if we don't have a medium */
2481     if (bs->drv == NULL) {
2482         for (i = 0; i < num_reqs; i++) {
2483             reqs[i].error = -ENOMEDIUM;
2484         }
2485         return -1;
2486     }
2487
2488     if (num_reqs == 0) {
2489         return 0;
2490     }
2491
2492     // Create MultiwriteCB structure
2493     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2494     mcb->num_requests = 0;
2495     mcb->num_callbacks = num_reqs;
2496
2497     for (i = 0; i < num_reqs; i++) {
2498         mcb->callbacks[i].cb = reqs[i].cb;
2499         mcb->callbacks[i].opaque = reqs[i].opaque;
2500     }
2501
2502     // Check for mergable requests
2503     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2504
2505     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2506
2507     /*
2508      * Run the aio requests. As soon as one request can't be submitted
2509      * successfully, fail all requests that are not yet submitted (we must
2510      * return failure for all requests anyway)
2511      *
2512      * num_requests cannot be set to the right value immediately: If
2513      * bdrv_aio_writev fails for some request, num_requests would be too high
2514      * and therefore multiwrite_cb() would never recognize the multiwrite
2515      * request as completed. We also cannot use the loop variable i to set it
2516      * when the first request fails because the callback may already have been
2517      * called for previously submitted requests. Thus, num_requests must be
2518      * incremented for each request that is submitted.
2519      *
2520      * The problem that callbacks may be called early also means that we need
2521      * to take care that num_requests doesn't become 0 before all requests are
2522      * submitted - multiwrite_cb() would consider the multiwrite request
2523      * completed. A dummy request that is "completed" by a manual call to
2524      * multiwrite_cb() takes care of this.
2525      */
2526     mcb->num_requests = 1;
2527
2528     // Run the aio requests
2529     for (i = 0; i < num_reqs; i++) {
2530         mcb->num_requests++;
2531         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2532             reqs[i].nb_sectors, multiwrite_cb, mcb);
2533
2534         if (acb == NULL) {
2535             // We can only fail the whole thing if no request has been
2536             // submitted yet. Otherwise we'll wait for the submitted AIOs to
2537             // complete and report the error in the callback.
2538             if (i == 0) {
2539                 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2540                 goto fail;
2541             } else {
2542                 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2543                 multiwrite_cb(mcb, -EIO);
2544                 break;
2545             }
2546         }
2547     }
2548
2549     /* Complete the dummy request */
2550     multiwrite_cb(mcb, 0);
2551
2552     return 0;
2553
2554 fail:
2555     for (i = 0; i < mcb->num_callbacks; i++) {
2556         reqs[i].error = -EIO;
2557     }
2558     g_free(mcb);
2559     return -1;
2560 }
2561
2562 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2563 {
2564     acb->pool->cancel(acb);
2565 }
2566
2567
2568 /**************************************************************/
2569 /* async block device emulation */
2570
2571 typedef struct BlockDriverAIOCBSync {
2572     BlockDriverAIOCB common;
2573     QEMUBH *bh;
2574     int ret;
2575     /* vector translation state */
2576     QEMUIOVector *qiov;
2577     uint8_t *bounce;
2578     int is_write;
2579 } BlockDriverAIOCBSync;
2580
2581 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2582 {
2583     BlockDriverAIOCBSync *acb =
2584         container_of(blockacb, BlockDriverAIOCBSync, common);
2585     qemu_bh_delete(acb->bh);
2586     acb->bh = NULL;
2587     qemu_aio_release(acb);
2588 }
2589
2590 static AIOPool bdrv_em_aio_pool = {
2591     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2592     .cancel             = bdrv_aio_cancel_em,
2593 };
2594
2595 static void bdrv_aio_bh_cb(void *opaque)
2596 {
2597     BlockDriverAIOCBSync *acb = opaque;
2598
2599     if (!acb->is_write)
2600         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2601     qemu_vfree(acb->bounce);
2602     acb->common.cb(acb->common.opaque, acb->ret);
2603     qemu_bh_delete(acb->bh);
2604     acb->bh = NULL;
2605     qemu_aio_release(acb);
2606 }
2607
2608 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2609                                             int64_t sector_num,
2610                                             QEMUIOVector *qiov,
2611                                             int nb_sectors,
2612                                             BlockDriverCompletionFunc *cb,
2613                                             void *opaque,
2614                                             int is_write)
2615
2616 {
2617     BlockDriverAIOCBSync *acb;
2618
2619     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2620     acb->is_write = is_write;
2621     acb->qiov = qiov;
2622     acb->bounce = qemu_blockalign(bs, qiov->size);
2623
2624     if (!acb->bh)
2625         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2626
2627     if (is_write) {
2628         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2629         acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2630     } else {
2631         acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2632     }
2633
2634     qemu_bh_schedule(acb->bh);
2635
2636     return &acb->common;
2637 }
2638
2639 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2640         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2641         BlockDriverCompletionFunc *cb, void *opaque)
2642 {
2643     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2644 }
2645
2646 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2647         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2648         BlockDriverCompletionFunc *cb, void *opaque)
2649 {
2650     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2651 }
2652
2653
2654 typedef struct BlockDriverAIOCBCoroutine {
2655     BlockDriverAIOCB common;
2656     BlockRequest req;
2657     bool is_write;
2658     QEMUBH* bh;
2659 } BlockDriverAIOCBCoroutine;
2660
2661 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2662 {
2663     qemu_aio_flush();
2664 }
2665
2666 static AIOPool bdrv_em_co_aio_pool = {
2667     .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
2668     .cancel             = bdrv_aio_co_cancel_em,
2669 };
2670
2671 static void bdrv_co_em_bh(void *opaque)
2672 {
2673     BlockDriverAIOCBCoroutine *acb = opaque;
2674
2675     acb->common.cb(acb->common.opaque, acb->req.error);
2676     qemu_bh_delete(acb->bh);
2677     qemu_aio_release(acb);
2678 }
2679
2680 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2681 static void coroutine_fn bdrv_co_do_rw(void *opaque)
2682 {
2683     BlockDriverAIOCBCoroutine *acb = opaque;
2684     BlockDriverState *bs = acb->common.bs;
2685
2686     if (!acb->is_write) {
2687         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2688             acb->req.nb_sectors, acb->req.qiov);
2689     } else {
2690         acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2691             acb->req.nb_sectors, acb->req.qiov);
2692     }
2693
2694     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2695     qemu_bh_schedule(acb->bh);
2696 }
2697
2698 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2699                                                int64_t sector_num,
2700                                                QEMUIOVector *qiov,
2701                                                int nb_sectors,
2702                                                BlockDriverCompletionFunc *cb,
2703                                                void *opaque,
2704                                                bool is_write)
2705 {
2706     Coroutine *co;
2707     BlockDriverAIOCBCoroutine *acb;
2708
2709     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2710     acb->req.sector = sector_num;
2711     acb->req.nb_sectors = nb_sectors;
2712     acb->req.qiov = qiov;
2713     acb->is_write = is_write;
2714
2715     co = qemu_coroutine_create(bdrv_co_do_rw);
2716     qemu_coroutine_enter(co, acb);
2717
2718     return &acb->common;
2719 }
2720
2721 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
2722 {
2723     BlockDriverAIOCBCoroutine *acb = opaque;
2724     BlockDriverState *bs = acb->common.bs;
2725
2726     acb->req.error = bdrv_co_flush(bs);
2727     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2728     qemu_bh_schedule(acb->bh);
2729 }
2730
2731 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2732         BlockDriverCompletionFunc *cb, void *opaque)
2733 {
2734     trace_bdrv_aio_flush(bs, opaque);
2735
2736     Coroutine *co;
2737     BlockDriverAIOCBCoroutine *acb;
2738
2739     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2740     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2741     qemu_coroutine_enter(co, acb);
2742
2743     return &acb->common;
2744 }
2745
2746 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2747 {
2748     BlockDriverAIOCBCoroutine *acb = opaque;
2749     BlockDriverState *bs = acb->common.bs;
2750
2751     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2752     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2753     qemu_bh_schedule(acb->bh);
2754 }
2755
2756 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2757         int64_t sector_num, int nb_sectors,
2758         BlockDriverCompletionFunc *cb, void *opaque)
2759 {
2760     Coroutine *co;
2761     BlockDriverAIOCBCoroutine *acb;
2762
2763     trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2764
2765     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2766     acb->req.sector = sector_num;
2767     acb->req.nb_sectors = nb_sectors;
2768     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2769     qemu_coroutine_enter(co, acb);
2770
2771     return &acb->common;
2772 }
2773
2774 void bdrv_init(void)
2775 {
2776     module_call_init(MODULE_INIT_BLOCK);
2777 }
2778
2779 void bdrv_init_with_whitelist(void)
2780 {
2781     use_bdrv_whitelist = 1;
2782     bdrv_init();
2783 }
2784
2785 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2786                    BlockDriverCompletionFunc *cb, void *opaque)
2787 {
2788     BlockDriverAIOCB *acb;
2789
2790     if (pool->free_aiocb) {
2791         acb = pool->free_aiocb;
2792         pool->free_aiocb = acb->next;
2793     } else {
2794         acb = g_malloc0(pool->aiocb_size);
2795         acb->pool = pool;
2796     }
2797     acb->bs = bs;
2798     acb->cb = cb;
2799     acb->opaque = opaque;
2800     return acb;
2801 }
2802
2803 void qemu_aio_release(void *p)
2804 {
2805     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2806     AIOPool *pool = acb->pool;
2807     acb->next = pool->free_aiocb;
2808     pool->free_aiocb = acb;
2809 }
2810
2811 /**************************************************************/
2812 /* Coroutine block device emulation */
2813
2814 typedef struct CoroutineIOCompletion {
2815     Coroutine *coroutine;
2816     int ret;
2817 } CoroutineIOCompletion;
2818
2819 static void bdrv_co_io_em_complete(void *opaque, int ret)
2820 {
2821     CoroutineIOCompletion *co = opaque;
2822
2823     co->ret = ret;
2824     qemu_coroutine_enter(co->coroutine, NULL);
2825 }
2826
2827 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2828                                       int nb_sectors, QEMUIOVector *iov,
2829                                       bool is_write)
2830 {
2831     CoroutineIOCompletion co = {
2832         .coroutine = qemu_coroutine_self(),
2833     };
2834     BlockDriverAIOCB *acb;
2835
2836     if (is_write) {
2837         acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2838                                        bdrv_co_io_em_complete, &co);
2839     } else {
2840         acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2841                                       bdrv_co_io_em_complete, &co);
2842     }
2843
2844     trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2845     if (!acb) {
2846         return -EIO;
2847     }
2848     qemu_coroutine_yield();
2849
2850     return co.ret;
2851 }
2852
2853 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2854                                          int64_t sector_num, int nb_sectors,
2855                                          QEMUIOVector *iov)
2856 {
2857     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2858 }
2859
2860 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2861                                          int64_t sector_num, int nb_sectors,
2862                                          QEMUIOVector *iov)
2863 {
2864     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2865 }
2866
2867 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2868 {
2869     RwCo *rwco = opaque;
2870
2871     rwco->ret = bdrv_co_flush(rwco->bs);
2872 }
2873
2874 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2875 {
2876     if (bs->open_flags & BDRV_O_NO_FLUSH) {
2877         return 0;
2878     } else if (!bs->drv) {
2879         return 0;
2880     } else if (bs->drv->bdrv_co_flush) {
2881         return bs->drv->bdrv_co_flush(bs);
2882     } else if (bs->drv->bdrv_aio_flush) {
2883         BlockDriverAIOCB *acb;
2884         CoroutineIOCompletion co = {
2885             .coroutine = qemu_coroutine_self(),
2886         };
2887
2888         acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2889         if (acb == NULL) {
2890             return -EIO;
2891         } else {
2892             qemu_coroutine_yield();
2893             return co.ret;
2894         }
2895     } else if (bs->drv->bdrv_flush) {
2896         return bs->drv->bdrv_flush(bs);
2897     } else {
2898         /*
2899          * Some block drivers always operate in either writethrough or unsafe
2900          * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2901          * know how the server works (because the behaviour is hardcoded or
2902          * depends on server-side configuration), so we can't ensure that
2903          * everything is safe on disk. Returning an error doesn't work because
2904          * that would break guests even if the server operates in writethrough
2905          * mode.
2906          *
2907          * Let's hope the user knows what he's doing.
2908          */
2909         return 0;
2910     }
2911 }
2912
2913 int bdrv_flush(BlockDriverState *bs)
2914 {
2915     Coroutine *co;
2916     RwCo rwco = {
2917         .bs = bs,
2918         .ret = NOT_DONE,
2919     };
2920
2921     if (qemu_in_coroutine()) {
2922         /* Fast-path if already in coroutine context */
2923         bdrv_flush_co_entry(&rwco);
2924     } else {
2925         co = qemu_coroutine_create(bdrv_flush_co_entry);
2926         qemu_coroutine_enter(co, &rwco);
2927         while (rwco.ret == NOT_DONE) {
2928             qemu_aio_wait();
2929         }
2930     }
2931
2932     return rwco.ret;
2933 }
2934
2935 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
2936 {
2937     RwCo *rwco = opaque;
2938
2939     rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
2940 }
2941
2942 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
2943                                  int nb_sectors)
2944 {
2945     if (!bs->drv) {
2946         return -ENOMEDIUM;
2947     } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2948         return -EIO;
2949     } else if (bs->read_only) {
2950         return -EROFS;
2951     } else if (bs->drv->bdrv_co_discard) {
2952         return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
2953     } else if (bs->drv->bdrv_aio_discard) {
2954         BlockDriverAIOCB *acb;
2955         CoroutineIOCompletion co = {
2956             .coroutine = qemu_coroutine_self(),
2957         };
2958
2959         acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
2960                                         bdrv_co_io_em_complete, &co);
2961         if (acb == NULL) {
2962             return -EIO;
2963         } else {
2964             qemu_coroutine_yield();
2965             return co.ret;
2966         }
2967     } else if (bs->drv->bdrv_discard) {
2968         return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
2969     } else {
2970         return 0;
2971     }
2972 }
2973
2974 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
2975 {
2976     Coroutine *co;
2977     RwCo rwco = {
2978         .bs = bs,
2979         .sector_num = sector_num,
2980         .nb_sectors = nb_sectors,
2981         .ret = NOT_DONE,
2982     };
2983
2984     if (qemu_in_coroutine()) {
2985         /* Fast-path if already in coroutine context */
2986         bdrv_discard_co_entry(&rwco);
2987     } else {
2988         co = qemu_coroutine_create(bdrv_discard_co_entry);
2989         qemu_coroutine_enter(co, &rwco);
2990         while (rwco.ret == NOT_DONE) {
2991             qemu_aio_wait();
2992         }
2993     }
2994
2995     return rwco.ret;
2996 }
2997
2998 /**************************************************************/
2999 /* removable device support */
3000
3001 /**
3002  * Return TRUE if the media is present
3003  */
3004 int bdrv_is_inserted(BlockDriverState *bs)
3005 {
3006     BlockDriver *drv = bs->drv;
3007
3008     if (!drv)
3009         return 0;
3010     if (!drv->bdrv_is_inserted)
3011         return 1;
3012     return drv->bdrv_is_inserted(bs);
3013 }
3014
3015 /**
3016  * Return whether the media changed since the last call to this
3017  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3018  */
3019 int bdrv_media_changed(BlockDriverState *bs)
3020 {
3021     BlockDriver *drv = bs->drv;
3022
3023     if (drv && drv->bdrv_media_changed) {
3024         return drv->bdrv_media_changed(bs);
3025     }
3026     return -ENOTSUP;
3027 }
3028
3029 /**
3030  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3031  */
3032 void bdrv_eject(BlockDriverState *bs, int eject_flag)
3033 {
3034     BlockDriver *drv = bs->drv;
3035
3036     if (drv && drv->bdrv_eject) {
3037         drv->bdrv_eject(bs, eject_flag);
3038     }
3039 }
3040
3041 /**
3042  * Lock or unlock the media (if it is locked, the user won't be able
3043  * to eject it manually).
3044  */
3045 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3046 {
3047     BlockDriver *drv = bs->drv;
3048
3049     trace_bdrv_lock_medium(bs, locked);
3050
3051     if (drv && drv->bdrv_lock_medium) {
3052         drv->bdrv_lock_medium(bs, locked);
3053     }
3054 }
3055
3056 /* needed for generic scsi interface */
3057
3058 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3059 {
3060     BlockDriver *drv = bs->drv;
3061
3062     if (drv && drv->bdrv_ioctl)
3063         return drv->bdrv_ioctl(bs, req, buf);
3064     return -ENOTSUP;
3065 }
3066
3067 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3068         unsigned long int req, void *buf,
3069         BlockDriverCompletionFunc *cb, void *opaque)
3070 {
3071     BlockDriver *drv = bs->drv;
3072
3073     if (drv && drv->bdrv_aio_ioctl)
3074         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3075     return NULL;
3076 }
3077
3078 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3079 {
3080     bs->buffer_alignment = align;
3081 }
3082
3083 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3084 {
3085     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3086 }
3087
3088 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3089 {
3090     int64_t bitmap_size;
3091
3092     bs->dirty_count = 0;
3093     if (enable) {
3094         if (!bs->dirty_bitmap) {
3095             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3096                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3097             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3098
3099             bs->dirty_bitmap = g_malloc0(bitmap_size);
3100         }
3101     } else {
3102         if (bs->dirty_bitmap) {
3103             g_free(bs->dirty_bitmap);
3104             bs->dirty_bitmap = NULL;
3105         }
3106     }
3107 }
3108
3109 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3110 {
3111     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3112
3113     if (bs->dirty_bitmap &&
3114         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3115         return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3116             (1UL << (chunk % (sizeof(unsigned long) * 8))));
3117     } else {
3118         return 0;
3119     }
3120 }
3121
3122 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3123                       int nr_sectors)
3124 {
3125     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3126 }
3127
3128 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3129 {
3130     return bs->dirty_count;
3131 }
3132
3133 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3134 {
3135     assert(bs->in_use != in_use);
3136     bs->in_use = in_use;
3137 }
3138
3139 int bdrv_in_use(BlockDriverState *bs)
3140 {
3141     return bs->in_use;
3142 }
3143
3144 void bdrv_iostatus_enable(BlockDriverState *bs)
3145 {
3146     bs->iostatus = BDRV_IOS_OK;
3147 }
3148
3149 /* The I/O status is only enabled if the drive explicitly
3150  * enables it _and_ the VM is configured to stop on errors */
3151 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3152 {
3153     return (bs->iostatus != BDRV_IOS_INVAL &&
3154            (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3155             bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
3156             bs->on_read_error == BLOCK_ERR_STOP_ANY));
3157 }
3158
3159 void bdrv_iostatus_disable(BlockDriverState *bs)
3160 {
3161     bs->iostatus = BDRV_IOS_INVAL;
3162 }
3163
3164 void bdrv_iostatus_reset(BlockDriverState *bs)
3165 {
3166     if (bdrv_iostatus_is_enabled(bs)) {
3167         bs->iostatus = BDRV_IOS_OK;
3168     }
3169 }
3170
3171 /* XXX: Today this is set by device models because it makes the implementation
3172    quite simple. However, the block layer knows about the error, so it's
3173    possible to implement this without device models being involved */
3174 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3175 {
3176     if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3177         assert(error >= 0);
3178         bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3179     }
3180 }
3181
3182 void
3183 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3184         enum BlockAcctType type)
3185 {
3186     assert(type < BDRV_MAX_IOTYPE);
3187
3188     cookie->bytes = bytes;
3189     cookie->start_time_ns = get_clock();
3190     cookie->type = type;
3191 }
3192
3193 void
3194 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3195 {
3196     assert(cookie->type < BDRV_MAX_IOTYPE);
3197
3198     bs->nr_bytes[cookie->type] += cookie->bytes;
3199     bs->nr_ops[cookie->type]++;
3200     bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3201 }
3202
3203 int bdrv_img_create(const char *filename, const char *fmt,
3204                     const char *base_filename, const char *base_fmt,
3205                     char *options, uint64_t img_size, int flags)
3206 {
3207     QEMUOptionParameter *param = NULL, *create_options = NULL;
3208     QEMUOptionParameter *backing_fmt, *backing_file, *size;
3209     BlockDriverState *bs = NULL;
3210     BlockDriver *drv, *proto_drv;
3211     BlockDriver *backing_drv = NULL;
3212     int ret = 0;
3213
3214     /* Find driver and parse its options */
3215     drv = bdrv_find_format(fmt);
3216     if (!drv) {
3217         error_report("Unknown file format '%s'", fmt);
3218         ret = -EINVAL;
3219         goto out;
3220     }
3221
3222     proto_drv = bdrv_find_protocol(filename);
3223     if (!proto_drv) {
3224         error_report("Unknown protocol '%s'", filename);
3225         ret = -EINVAL;
3226         goto out;
3227     }
3228
3229     create_options = append_option_parameters(create_options,
3230                                               drv->create_options);
3231     create_options = append_option_parameters(create_options,
3232                                               proto_drv->create_options);
3233
3234     /* Create parameter list with default values */
3235     param = parse_option_parameters("", create_options, param);
3236
3237     set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3238
3239     /* Parse -o options */
3240     if (options) {
3241         param = parse_option_parameters(options, create_options, param);
3242         if (param == NULL) {
3243             error_report("Invalid options for file format '%s'.", fmt);
3244             ret = -EINVAL;
3245             goto out;
3246         }
3247     }
3248
3249     if (base_filename) {
3250         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3251                                  base_filename)) {
3252             error_report("Backing file not supported for file format '%s'",
3253                          fmt);
3254             ret = -EINVAL;
3255             goto out;
3256         }
3257     }
3258
3259     if (base_fmt) {
3260         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3261             error_report("Backing file format not supported for file "
3262                          "format '%s'", fmt);
3263             ret = -EINVAL;
3264             goto out;
3265         }
3266     }
3267
3268     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3269     if (backing_file && backing_file->value.s) {
3270         if (!strcmp(filename, backing_file->value.s)) {
3271             error_report("Error: Trying to create an image with the "
3272                          "same filename as the backing file");
3273             ret = -EINVAL;
3274             goto out;
3275         }
3276     }
3277
3278     backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3279     if (backing_fmt && backing_fmt->value.s) {
3280         backing_drv = bdrv_find_format(backing_fmt->value.s);
3281         if (!backing_drv) {
3282             error_report("Unknown backing file format '%s'",
3283                          backing_fmt->value.s);
3284             ret = -EINVAL;
3285             goto out;
3286         }
3287     }
3288
3289     // The size for the image must always be specified, with one exception:
3290     // If we are using a backing file, we can obtain the size from there
3291     size = get_option_parameter(param, BLOCK_OPT_SIZE);
3292     if (size && size->value.n == -1) {
3293         if (backing_file && backing_file->value.s) {
3294             uint64_t size;
3295             char buf[32];
3296
3297             bs = bdrv_new("");
3298
3299             ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3300             if (ret < 0) {
3301                 error_report("Could not open '%s'", backing_file->value.s);
3302                 goto out;
3303             }
3304             bdrv_get_geometry(bs, &size);
3305             size *= 512;
3306
3307             snprintf(buf, sizeof(buf), "%" PRId64, size);
3308             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3309         } else {
3310             error_report("Image creation needs a size parameter");
3311             ret = -EINVAL;
3312             goto out;
3313         }
3314     }
3315
3316     printf("Formatting '%s', fmt=%s ", filename, fmt);
3317     print_option_parameters(param);
3318     puts("");
3319
3320     ret = bdrv_create(drv, filename, param);
3321
3322     if (ret < 0) {
3323         if (ret == -ENOTSUP) {
3324             error_report("Formatting or formatting option not supported for "
3325                          "file format '%s'", fmt);
3326         } else if (ret == -EFBIG) {
3327             error_report("The image size is too large for file format '%s'",
3328                          fmt);
3329         } else {
3330             error_report("%s: error while creating %s: %s", filename, fmt,
3331                          strerror(-ret));
3332         }
3333     }
3334
3335 out:
3336     free_option_parameters(create_options);
3337     free_option_parameters(param);
3338
3339     if (bs) {
3340         bdrv_delete(bs);
3341     }
3342
3343     return ret;
3344 }
This page took 0.200263 seconds and 4 git commands to generate.