]> Git Repo - qemu.git/blob - block.c
Merge remote-tracking branch 'stefanha/trivial-patches' into staging
[qemu.git] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
32
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
48         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
49         BlockDriverCompletionFunc *cb, void *opaque);
50 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
51         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
54         BlockDriverCompletionFunc *cb, void *opaque);
55 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
56         BlockDriverCompletionFunc *cb, void *opaque);
57 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
58                         uint8_t *buf, int nb_sectors);
59 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
60                          const uint8_t *buf, int nb_sectors);
61 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
62         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63         BlockDriverCompletionFunc *cb, void *opaque);
64 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
65         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66         BlockDriverCompletionFunc *cb, void *opaque);
67 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68                                          int64_t sector_num, int nb_sectors,
69                                          QEMUIOVector *iov);
70 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71                                          int64_t sector_num, int nb_sectors,
72                                          QEMUIOVector *iov);
73 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
74
75 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
76     QTAILQ_HEAD_INITIALIZER(bdrv_states);
77
78 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
79     QLIST_HEAD_INITIALIZER(bdrv_drivers);
80
81 /* The device to use for VM snapshots */
82 static BlockDriverState *bs_snapshots;
83
84 /* If non-zero, use only whitelisted block drivers */
85 static int use_bdrv_whitelist;
86
87 #ifdef _WIN32
88 static int is_windows_drive_prefix(const char *filename)
89 {
90     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
91              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
92             filename[1] == ':');
93 }
94
95 int is_windows_drive(const char *filename)
96 {
97     if (is_windows_drive_prefix(filename) &&
98         filename[2] == '\0')
99         return 1;
100     if (strstart(filename, "\\\\.\\", NULL) ||
101         strstart(filename, "//./", NULL))
102         return 1;
103     return 0;
104 }
105 #endif
106
107 /* check if the path starts with "<protocol>:" */
108 static int path_has_protocol(const char *path)
109 {
110 #ifdef _WIN32
111     if (is_windows_drive(path) ||
112         is_windows_drive_prefix(path)) {
113         return 0;
114     }
115 #endif
116
117     return strchr(path, ':') != NULL;
118 }
119
120 int path_is_absolute(const char *path)
121 {
122     const char *p;
123 #ifdef _WIN32
124     /* specific case for names like: "\\.\d:" */
125     if (*path == '/' || *path == '\\')
126         return 1;
127 #endif
128     p = strchr(path, ':');
129     if (p)
130         p++;
131     else
132         p = path;
133 #ifdef _WIN32
134     return (*p == '/' || *p == '\\');
135 #else
136     return (*p == '/');
137 #endif
138 }
139
140 /* if filename is absolute, just copy it to dest. Otherwise, build a
141    path to it by considering it is relative to base_path. URL are
142    supported. */
143 void path_combine(char *dest, int dest_size,
144                   const char *base_path,
145                   const char *filename)
146 {
147     const char *p, *p1;
148     int len;
149
150     if (dest_size <= 0)
151         return;
152     if (path_is_absolute(filename)) {
153         pstrcpy(dest, dest_size, filename);
154     } else {
155         p = strchr(base_path, ':');
156         if (p)
157             p++;
158         else
159             p = base_path;
160         p1 = strrchr(base_path, '/');
161 #ifdef _WIN32
162         {
163             const char *p2;
164             p2 = strrchr(base_path, '\\');
165             if (!p1 || p2 > p1)
166                 p1 = p2;
167         }
168 #endif
169         if (p1)
170             p1++;
171         else
172             p1 = base_path;
173         if (p1 > p)
174             p = p1;
175         len = p - base_path;
176         if (len > dest_size - 1)
177             len = dest_size - 1;
178         memcpy(dest, base_path, len);
179         dest[len] = '\0';
180         pstrcat(dest, dest_size, filename);
181     }
182 }
183
184 void bdrv_register(BlockDriver *bdrv)
185 {
186     if (bdrv->bdrv_co_readv) {
187         /* Emulate AIO by coroutines, and sync by AIO */
188         bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
189         bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
190         bdrv->bdrv_read = bdrv_read_em;
191         bdrv->bdrv_write = bdrv_write_em;
192      } else {
193         bdrv->bdrv_co_readv = bdrv_co_readv_em;
194         bdrv->bdrv_co_writev = bdrv_co_writev_em;
195
196         if (!bdrv->bdrv_aio_readv) {
197             /* add AIO emulation layer */
198             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
199             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
200         } else if (!bdrv->bdrv_read) {
201             /* add synchronous IO emulation layer */
202             bdrv->bdrv_read = bdrv_read_em;
203             bdrv->bdrv_write = bdrv_write_em;
204         }
205     }
206
207     if (!bdrv->bdrv_aio_flush)
208         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
209
210     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
211 }
212
213 /* create a new block device (by default it is empty) */
214 BlockDriverState *bdrv_new(const char *device_name)
215 {
216     BlockDriverState *bs;
217
218     bs = g_malloc0(sizeof(BlockDriverState));
219     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
220     if (device_name[0] != '\0') {
221         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
222     }
223     return bs;
224 }
225
226 BlockDriver *bdrv_find_format(const char *format_name)
227 {
228     BlockDriver *drv1;
229     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
230         if (!strcmp(drv1->format_name, format_name)) {
231             return drv1;
232         }
233     }
234     return NULL;
235 }
236
237 static int bdrv_is_whitelisted(BlockDriver *drv)
238 {
239     static const char *whitelist[] = {
240         CONFIG_BDRV_WHITELIST
241     };
242     const char **p;
243
244     if (!whitelist[0])
245         return 1;               /* no whitelist, anything goes */
246
247     for (p = whitelist; *p; p++) {
248         if (!strcmp(drv->format_name, *p)) {
249             return 1;
250         }
251     }
252     return 0;
253 }
254
255 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
256 {
257     BlockDriver *drv = bdrv_find_format(format_name);
258     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
259 }
260
261 int bdrv_create(BlockDriver *drv, const char* filename,
262     QEMUOptionParameter *options)
263 {
264     if (!drv->bdrv_create)
265         return -ENOTSUP;
266
267     return drv->bdrv_create(filename, options);
268 }
269
270 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
271 {
272     BlockDriver *drv;
273
274     drv = bdrv_find_protocol(filename);
275     if (drv == NULL) {
276         return -ENOENT;
277     }
278
279     return bdrv_create(drv, filename, options);
280 }
281
282 #ifdef _WIN32
283 void get_tmp_filename(char *filename, int size)
284 {
285     char temp_dir[MAX_PATH];
286
287     GetTempPath(MAX_PATH, temp_dir);
288     GetTempFileName(temp_dir, "qem", 0, filename);
289 }
290 #else
291 void get_tmp_filename(char *filename, int size)
292 {
293     int fd;
294     const char *tmpdir;
295     /* XXX: race condition possible */
296     tmpdir = getenv("TMPDIR");
297     if (!tmpdir)
298         tmpdir = "/tmp";
299     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
300     fd = mkstemp(filename);
301     close(fd);
302 }
303 #endif
304
305 /*
306  * Detect host devices. By convention, /dev/cdrom[N] is always
307  * recognized as a host CDROM.
308  */
309 static BlockDriver *find_hdev_driver(const char *filename)
310 {
311     int score_max = 0, score;
312     BlockDriver *drv = NULL, *d;
313
314     QLIST_FOREACH(d, &bdrv_drivers, list) {
315         if (d->bdrv_probe_device) {
316             score = d->bdrv_probe_device(filename);
317             if (score > score_max) {
318                 score_max = score;
319                 drv = d;
320             }
321         }
322     }
323
324     return drv;
325 }
326
327 BlockDriver *bdrv_find_protocol(const char *filename)
328 {
329     BlockDriver *drv1;
330     char protocol[128];
331     int len;
332     const char *p;
333
334     /* TODO Drivers without bdrv_file_open must be specified explicitly */
335
336     /*
337      * XXX(hch): we really should not let host device detection
338      * override an explicit protocol specification, but moving this
339      * later breaks access to device names with colons in them.
340      * Thanks to the brain-dead persistent naming schemes on udev-
341      * based Linux systems those actually are quite common.
342      */
343     drv1 = find_hdev_driver(filename);
344     if (drv1) {
345         return drv1;
346     }
347
348     if (!path_has_protocol(filename)) {
349         return bdrv_find_format("file");
350     }
351     p = strchr(filename, ':');
352     assert(p != NULL);
353     len = p - filename;
354     if (len > sizeof(protocol) - 1)
355         len = sizeof(protocol) - 1;
356     memcpy(protocol, filename, len);
357     protocol[len] = '\0';
358     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
359         if (drv1->protocol_name &&
360             !strcmp(drv1->protocol_name, protocol)) {
361             return drv1;
362         }
363     }
364     return NULL;
365 }
366
367 static int find_image_format(const char *filename, BlockDriver **pdrv)
368 {
369     int ret, score, score_max;
370     BlockDriver *drv1, *drv;
371     uint8_t buf[2048];
372     BlockDriverState *bs;
373
374     ret = bdrv_file_open(&bs, filename, 0);
375     if (ret < 0) {
376         *pdrv = NULL;
377         return ret;
378     }
379
380     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
381     if (bs->sg || !bdrv_is_inserted(bs)) {
382         bdrv_delete(bs);
383         drv = bdrv_find_format("raw");
384         if (!drv) {
385             ret = -ENOENT;
386         }
387         *pdrv = drv;
388         return ret;
389     }
390
391     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
392     bdrv_delete(bs);
393     if (ret < 0) {
394         *pdrv = NULL;
395         return ret;
396     }
397
398     score_max = 0;
399     drv = NULL;
400     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
401         if (drv1->bdrv_probe) {
402             score = drv1->bdrv_probe(buf, ret, filename);
403             if (score > score_max) {
404                 score_max = score;
405                 drv = drv1;
406             }
407         }
408     }
409     if (!drv) {
410         ret = -ENOENT;
411     }
412     *pdrv = drv;
413     return ret;
414 }
415
416 /**
417  * Set the current 'total_sectors' value
418  */
419 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
420 {
421     BlockDriver *drv = bs->drv;
422
423     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
424     if (bs->sg)
425         return 0;
426
427     /* query actual device if possible, otherwise just trust the hint */
428     if (drv->bdrv_getlength) {
429         int64_t length = drv->bdrv_getlength(bs);
430         if (length < 0) {
431             return length;
432         }
433         hint = length >> BDRV_SECTOR_BITS;
434     }
435
436     bs->total_sectors = hint;
437     return 0;
438 }
439
440 /**
441  * Set open flags for a given cache mode
442  *
443  * Return 0 on success, -1 if the cache mode was invalid.
444  */
445 int bdrv_parse_cache_flags(const char *mode, int *flags)
446 {
447     *flags &= ~BDRV_O_CACHE_MASK;
448
449     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
450         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
451     } else if (!strcmp(mode, "directsync")) {
452         *flags |= BDRV_O_NOCACHE;
453     } else if (!strcmp(mode, "writeback")) {
454         *flags |= BDRV_O_CACHE_WB;
455     } else if (!strcmp(mode, "unsafe")) {
456         *flags |= BDRV_O_CACHE_WB;
457         *flags |= BDRV_O_NO_FLUSH;
458     } else if (!strcmp(mode, "writethrough")) {
459         /* this is the default */
460     } else {
461         return -1;
462     }
463
464     return 0;
465 }
466
467 /*
468  * Common part for opening disk images and files
469  */
470 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
471     int flags, BlockDriver *drv)
472 {
473     int ret, open_flags;
474
475     assert(drv != NULL);
476
477     bs->file = NULL;
478     bs->total_sectors = 0;
479     bs->encrypted = 0;
480     bs->valid_key = 0;
481     bs->open_flags = flags;
482     /* buffer_alignment defaulted to 512, drivers can change this value */
483     bs->buffer_alignment = 512;
484
485     pstrcpy(bs->filename, sizeof(bs->filename), filename);
486
487     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
488         return -ENOTSUP;
489     }
490
491     bs->drv = drv;
492     bs->opaque = g_malloc0(drv->instance_size);
493
494     if (flags & BDRV_O_CACHE_WB)
495         bs->enable_write_cache = 1;
496
497     /*
498      * Clear flags that are internal to the block layer before opening the
499      * image.
500      */
501     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
502
503     /*
504      * Snapshots should be writable.
505      */
506     if (bs->is_temporary) {
507         open_flags |= BDRV_O_RDWR;
508     }
509
510     /* Open the image, either directly or using a protocol */
511     if (drv->bdrv_file_open) {
512         ret = drv->bdrv_file_open(bs, filename, open_flags);
513     } else {
514         ret = bdrv_file_open(&bs->file, filename, open_flags);
515         if (ret >= 0) {
516             ret = drv->bdrv_open(bs, open_flags);
517         }
518     }
519
520     if (ret < 0) {
521         goto free_and_fail;
522     }
523
524     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
525
526     ret = refresh_total_sectors(bs, bs->total_sectors);
527     if (ret < 0) {
528         goto free_and_fail;
529     }
530
531 #ifndef _WIN32
532     if (bs->is_temporary) {
533         unlink(filename);
534     }
535 #endif
536     return 0;
537
538 free_and_fail:
539     if (bs->file) {
540         bdrv_delete(bs->file);
541         bs->file = NULL;
542     }
543     g_free(bs->opaque);
544     bs->opaque = NULL;
545     bs->drv = NULL;
546     return ret;
547 }
548
549 /*
550  * Opens a file using a protocol (file, host_device, nbd, ...)
551  */
552 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
553 {
554     BlockDriverState *bs;
555     BlockDriver *drv;
556     int ret;
557
558     drv = bdrv_find_protocol(filename);
559     if (!drv) {
560         return -ENOENT;
561     }
562
563     bs = bdrv_new("");
564     ret = bdrv_open_common(bs, filename, flags, drv);
565     if (ret < 0) {
566         bdrv_delete(bs);
567         return ret;
568     }
569     bs->growable = 1;
570     *pbs = bs;
571     return 0;
572 }
573
574 /*
575  * Opens a disk image (raw, qcow2, vmdk, ...)
576  */
577 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
578               BlockDriver *drv)
579 {
580     int ret;
581
582     if (flags & BDRV_O_SNAPSHOT) {
583         BlockDriverState *bs1;
584         int64_t total_size;
585         int is_protocol = 0;
586         BlockDriver *bdrv_qcow2;
587         QEMUOptionParameter *options;
588         char tmp_filename[PATH_MAX];
589         char backing_filename[PATH_MAX];
590
591         /* if snapshot, we create a temporary backing file and open it
592            instead of opening 'filename' directly */
593
594         /* if there is a backing file, use it */
595         bs1 = bdrv_new("");
596         ret = bdrv_open(bs1, filename, 0, drv);
597         if (ret < 0) {
598             bdrv_delete(bs1);
599             return ret;
600         }
601         total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
602
603         if (bs1->drv && bs1->drv->protocol_name)
604             is_protocol = 1;
605
606         bdrv_delete(bs1);
607
608         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
609
610         /* Real path is meaningless for protocols */
611         if (is_protocol)
612             snprintf(backing_filename, sizeof(backing_filename),
613                      "%s", filename);
614         else if (!realpath(filename, backing_filename))
615             return -errno;
616
617         bdrv_qcow2 = bdrv_find_format("qcow2");
618         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
619
620         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
621         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
622         if (drv) {
623             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
624                 drv->format_name);
625         }
626
627         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
628         free_option_parameters(options);
629         if (ret < 0) {
630             return ret;
631         }
632
633         filename = tmp_filename;
634         drv = bdrv_qcow2;
635         bs->is_temporary = 1;
636     }
637
638     /* Find the right image format driver */
639     if (!drv) {
640         ret = find_image_format(filename, &drv);
641     }
642
643     if (!drv) {
644         goto unlink_and_fail;
645     }
646
647     /* Open the image */
648     ret = bdrv_open_common(bs, filename, flags, drv);
649     if (ret < 0) {
650         goto unlink_and_fail;
651     }
652
653     /* If there is a backing file, use it */
654     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
655         char backing_filename[PATH_MAX];
656         int back_flags;
657         BlockDriver *back_drv = NULL;
658
659         bs->backing_hd = bdrv_new("");
660
661         if (path_has_protocol(bs->backing_file)) {
662             pstrcpy(backing_filename, sizeof(backing_filename),
663                     bs->backing_file);
664         } else {
665             path_combine(backing_filename, sizeof(backing_filename),
666                          filename, bs->backing_file);
667         }
668
669         if (bs->backing_format[0] != '\0') {
670             back_drv = bdrv_find_format(bs->backing_format);
671         }
672
673         /* backing files always opened read-only */
674         back_flags =
675             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
676
677         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
678         if (ret < 0) {
679             bdrv_close(bs);
680             return ret;
681         }
682         if (bs->is_temporary) {
683             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
684         } else {
685             /* base image inherits from "parent" */
686             bs->backing_hd->keep_read_only = bs->keep_read_only;
687         }
688     }
689
690     if (!bdrv_key_required(bs)) {
691         /* call the change callback */
692         bs->media_changed = 1;
693         if (bs->change_cb)
694             bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
695     }
696
697     return 0;
698
699 unlink_and_fail:
700     if (bs->is_temporary) {
701         unlink(filename);
702     }
703     return ret;
704 }
705
706 void bdrv_close(BlockDriverState *bs)
707 {
708     if (bs->drv) {
709         if (bs == bs_snapshots) {
710             bs_snapshots = NULL;
711         }
712         if (bs->backing_hd) {
713             bdrv_delete(bs->backing_hd);
714             bs->backing_hd = NULL;
715         }
716         bs->drv->bdrv_close(bs);
717         g_free(bs->opaque);
718 #ifdef _WIN32
719         if (bs->is_temporary) {
720             unlink(bs->filename);
721         }
722 #endif
723         bs->opaque = NULL;
724         bs->drv = NULL;
725
726         if (bs->file != NULL) {
727             bdrv_close(bs->file);
728         }
729
730         /* call the change callback */
731         bs->media_changed = 1;
732         if (bs->change_cb)
733             bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
734     }
735 }
736
737 void bdrv_close_all(void)
738 {
739     BlockDriverState *bs;
740
741     QTAILQ_FOREACH(bs, &bdrv_states, list) {
742         bdrv_close(bs);
743     }
744 }
745
746 /* make a BlockDriverState anonymous by removing from bdrv_state list.
747    Also, NULL terminate the device_name to prevent double remove */
748 void bdrv_make_anon(BlockDriverState *bs)
749 {
750     if (bs->device_name[0] != '\0') {
751         QTAILQ_REMOVE(&bdrv_states, bs, list);
752     }
753     bs->device_name[0] = '\0';
754 }
755
756 void bdrv_delete(BlockDriverState *bs)
757 {
758     assert(!bs->peer);
759
760     /* remove from list, if necessary */
761     bdrv_make_anon(bs);
762
763     bdrv_close(bs);
764     if (bs->file != NULL) {
765         bdrv_delete(bs->file);
766     }
767
768     assert(bs != bs_snapshots);
769     g_free(bs);
770 }
771
772 int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
773 {
774     if (bs->peer) {
775         return -EBUSY;
776     }
777     bs->peer = qdev;
778     return 0;
779 }
780
781 void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
782 {
783     assert(bs->peer == qdev);
784     bs->peer = NULL;
785     bs->change_cb = NULL;
786     bs->change_opaque = NULL;
787 }
788
789 DeviceState *bdrv_get_attached(BlockDriverState *bs)
790 {
791     return bs->peer;
792 }
793
794 /*
795  * Run consistency checks on an image
796  *
797  * Returns 0 if the check could be completed (it doesn't mean that the image is
798  * free of errors) or -errno when an internal error occurred. The results of the
799  * check are stored in res.
800  */
801 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
802 {
803     if (bs->drv->bdrv_check == NULL) {
804         return -ENOTSUP;
805     }
806
807     memset(res, 0, sizeof(*res));
808     return bs->drv->bdrv_check(bs, res);
809 }
810
811 #define COMMIT_BUF_SECTORS 2048
812
813 /* commit COW file into the raw image */
814 int bdrv_commit(BlockDriverState *bs)
815 {
816     BlockDriver *drv = bs->drv;
817     BlockDriver *backing_drv;
818     int64_t sector, total_sectors;
819     int n, ro, open_flags;
820     int ret = 0, rw_ret = 0;
821     uint8_t *buf;
822     char filename[1024];
823     BlockDriverState *bs_rw, *bs_ro;
824
825     if (!drv)
826         return -ENOMEDIUM;
827     
828     if (!bs->backing_hd) {
829         return -ENOTSUP;
830     }
831
832     if (bs->backing_hd->keep_read_only) {
833         return -EACCES;
834     }
835
836     backing_drv = bs->backing_hd->drv;
837     ro = bs->backing_hd->read_only;
838     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
839     open_flags =  bs->backing_hd->open_flags;
840
841     if (ro) {
842         /* re-open as RW */
843         bdrv_delete(bs->backing_hd);
844         bs->backing_hd = NULL;
845         bs_rw = bdrv_new("");
846         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
847             backing_drv);
848         if (rw_ret < 0) {
849             bdrv_delete(bs_rw);
850             /* try to re-open read-only */
851             bs_ro = bdrv_new("");
852             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
853                 backing_drv);
854             if (ret < 0) {
855                 bdrv_delete(bs_ro);
856                 /* drive not functional anymore */
857                 bs->drv = NULL;
858                 return ret;
859             }
860             bs->backing_hd = bs_ro;
861             return rw_ret;
862         }
863         bs->backing_hd = bs_rw;
864     }
865
866     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
867     buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
868
869     for (sector = 0; sector < total_sectors; sector += n) {
870         if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
871
872             if (bdrv_read(bs, sector, buf, n) != 0) {
873                 ret = -EIO;
874                 goto ro_cleanup;
875             }
876
877             if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
878                 ret = -EIO;
879                 goto ro_cleanup;
880             }
881         }
882     }
883
884     if (drv->bdrv_make_empty) {
885         ret = drv->bdrv_make_empty(bs);
886         bdrv_flush(bs);
887     }
888
889     /*
890      * Make sure all data we wrote to the backing device is actually
891      * stable on disk.
892      */
893     if (bs->backing_hd)
894         bdrv_flush(bs->backing_hd);
895
896 ro_cleanup:
897     g_free(buf);
898
899     if (ro) {
900         /* re-open as RO */
901         bdrv_delete(bs->backing_hd);
902         bs->backing_hd = NULL;
903         bs_ro = bdrv_new("");
904         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
905             backing_drv);
906         if (ret < 0) {
907             bdrv_delete(bs_ro);
908             /* drive not functional anymore */
909             bs->drv = NULL;
910             return ret;
911         }
912         bs->backing_hd = bs_ro;
913         bs->backing_hd->keep_read_only = 0;
914     }
915
916     return ret;
917 }
918
919 void bdrv_commit_all(void)
920 {
921     BlockDriverState *bs;
922
923     QTAILQ_FOREACH(bs, &bdrv_states, list) {
924         bdrv_commit(bs);
925     }
926 }
927
928 /*
929  * Return values:
930  * 0        - success
931  * -EINVAL  - backing format specified, but no file
932  * -ENOSPC  - can't update the backing file because no space is left in the
933  *            image file header
934  * -ENOTSUP - format driver doesn't support changing the backing file
935  */
936 int bdrv_change_backing_file(BlockDriverState *bs,
937     const char *backing_file, const char *backing_fmt)
938 {
939     BlockDriver *drv = bs->drv;
940
941     if (drv->bdrv_change_backing_file != NULL) {
942         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
943     } else {
944         return -ENOTSUP;
945     }
946 }
947
948 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
949                                    size_t size)
950 {
951     int64_t len;
952
953     if (!bdrv_is_inserted(bs))
954         return -ENOMEDIUM;
955
956     if (bs->growable)
957         return 0;
958
959     len = bdrv_getlength(bs);
960
961     if (offset < 0)
962         return -EIO;
963
964     if ((offset > len) || (len - offset < size))
965         return -EIO;
966
967     return 0;
968 }
969
970 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
971                               int nb_sectors)
972 {
973     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
974                                    nb_sectors * BDRV_SECTOR_SIZE);
975 }
976
977 static inline bool bdrv_has_async_rw(BlockDriver *drv)
978 {
979     return drv->bdrv_co_readv != bdrv_co_readv_em
980         || drv->bdrv_aio_readv != bdrv_aio_readv_em;
981 }
982
983 static inline bool bdrv_has_async_flush(BlockDriver *drv)
984 {
985     return drv->bdrv_aio_flush != bdrv_aio_flush_em;
986 }
987
988 /* return < 0 if error. See bdrv_write() for the return codes */
989 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
990               uint8_t *buf, int nb_sectors)
991 {
992     BlockDriver *drv = bs->drv;
993
994     if (!drv)
995         return -ENOMEDIUM;
996
997     if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
998         QEMUIOVector qiov;
999         struct iovec iov = {
1000             .iov_base = (void *)buf,
1001             .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1002         };
1003
1004         qemu_iovec_init_external(&qiov, &iov, 1);
1005         return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
1006     }
1007
1008     if (bdrv_check_request(bs, sector_num, nb_sectors))
1009         return -EIO;
1010
1011     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
1012 }
1013
1014 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1015                              int nb_sectors, int dirty)
1016 {
1017     int64_t start, end;
1018     unsigned long val, idx, bit;
1019
1020     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1021     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1022
1023     for (; start <= end; start++) {
1024         idx = start / (sizeof(unsigned long) * 8);
1025         bit = start % (sizeof(unsigned long) * 8);
1026         val = bs->dirty_bitmap[idx];
1027         if (dirty) {
1028             if (!(val & (1UL << bit))) {
1029                 bs->dirty_count++;
1030                 val |= 1UL << bit;
1031             }
1032         } else {
1033             if (val & (1UL << bit)) {
1034                 bs->dirty_count--;
1035                 val &= ~(1UL << bit);
1036             }
1037         }
1038         bs->dirty_bitmap[idx] = val;
1039     }
1040 }
1041
1042 /* Return < 0 if error. Important errors are:
1043   -EIO         generic I/O error (may happen for all errors)
1044   -ENOMEDIUM   No media inserted.
1045   -EINVAL      Invalid sector number or nb_sectors
1046   -EACCES      Trying to write a read-only device
1047 */
1048 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1049                const uint8_t *buf, int nb_sectors)
1050 {
1051     BlockDriver *drv = bs->drv;
1052
1053     if (!bs->drv)
1054         return -ENOMEDIUM;
1055
1056     if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1057         QEMUIOVector qiov;
1058         struct iovec iov = {
1059             .iov_base = (void *)buf,
1060             .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1061         };
1062
1063         qemu_iovec_init_external(&qiov, &iov, 1);
1064         return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1065     }
1066
1067     if (bs->read_only)
1068         return -EACCES;
1069     if (bdrv_check_request(bs, sector_num, nb_sectors))
1070         return -EIO;
1071
1072     if (bs->dirty_bitmap) {
1073         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1074     }
1075
1076     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1077         bs->wr_highest_sector = sector_num + nb_sectors - 1;
1078     }
1079
1080     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1081 }
1082
1083 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1084                void *buf, int count1)
1085 {
1086     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1087     int len, nb_sectors, count;
1088     int64_t sector_num;
1089     int ret;
1090
1091     count = count1;
1092     /* first read to align to sector start */
1093     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1094     if (len > count)
1095         len = count;
1096     sector_num = offset >> BDRV_SECTOR_BITS;
1097     if (len > 0) {
1098         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1099             return ret;
1100         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1101         count -= len;
1102         if (count == 0)
1103             return count1;
1104         sector_num++;
1105         buf += len;
1106     }
1107
1108     /* read the sectors "in place" */
1109     nb_sectors = count >> BDRV_SECTOR_BITS;
1110     if (nb_sectors > 0) {
1111         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1112             return ret;
1113         sector_num += nb_sectors;
1114         len = nb_sectors << BDRV_SECTOR_BITS;
1115         buf += len;
1116         count -= len;
1117     }
1118
1119     /* add data from the last sector */
1120     if (count > 0) {
1121         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1122             return ret;
1123         memcpy(buf, tmp_buf, count);
1124     }
1125     return count1;
1126 }
1127
1128 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1129                 const void *buf, int count1)
1130 {
1131     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1132     int len, nb_sectors, count;
1133     int64_t sector_num;
1134     int ret;
1135
1136     count = count1;
1137     /* first write to align to sector start */
1138     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1139     if (len > count)
1140         len = count;
1141     sector_num = offset >> BDRV_SECTOR_BITS;
1142     if (len > 0) {
1143         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1144             return ret;
1145         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1146         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1147             return ret;
1148         count -= len;
1149         if (count == 0)
1150             return count1;
1151         sector_num++;
1152         buf += len;
1153     }
1154
1155     /* write the sectors "in place" */
1156     nb_sectors = count >> BDRV_SECTOR_BITS;
1157     if (nb_sectors > 0) {
1158         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1159             return ret;
1160         sector_num += nb_sectors;
1161         len = nb_sectors << BDRV_SECTOR_BITS;
1162         buf += len;
1163         count -= len;
1164     }
1165
1166     /* add data from the last sector */
1167     if (count > 0) {
1168         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1169             return ret;
1170         memcpy(tmp_buf, buf, count);
1171         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1172             return ret;
1173     }
1174     return count1;
1175 }
1176
1177 /*
1178  * Writes to the file and ensures that no writes are reordered across this
1179  * request (acts as a barrier)
1180  *
1181  * Returns 0 on success, -errno in error cases.
1182  */
1183 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1184     const void *buf, int count)
1185 {
1186     int ret;
1187
1188     ret = bdrv_pwrite(bs, offset, buf, count);
1189     if (ret < 0) {
1190         return ret;
1191     }
1192
1193     /* No flush needed for cache modes that use O_DSYNC */
1194     if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1195         bdrv_flush(bs);
1196     }
1197
1198     return 0;
1199 }
1200
1201 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1202     int nb_sectors, QEMUIOVector *qiov)
1203 {
1204     BlockDriver *drv = bs->drv;
1205
1206     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1207
1208     if (!drv) {
1209         return -ENOMEDIUM;
1210     }
1211     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1212         return -EIO;
1213     }
1214
1215     return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1216 }
1217
1218 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1219     int nb_sectors, QEMUIOVector *qiov)
1220 {
1221     BlockDriver *drv = bs->drv;
1222
1223     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1224
1225     if (!bs->drv) {
1226         return -ENOMEDIUM;
1227     }
1228     if (bs->read_only) {
1229         return -EACCES;
1230     }
1231     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1232         return -EIO;
1233     }
1234
1235     if (bs->dirty_bitmap) {
1236         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1237     }
1238
1239     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1240         bs->wr_highest_sector = sector_num + nb_sectors - 1;
1241     }
1242
1243     return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1244 }
1245
1246 /**
1247  * Truncate file to 'offset' bytes (needed only for file protocols)
1248  */
1249 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1250 {
1251     BlockDriver *drv = bs->drv;
1252     int ret;
1253     if (!drv)
1254         return -ENOMEDIUM;
1255     if (!drv->bdrv_truncate)
1256         return -ENOTSUP;
1257     if (bs->read_only)
1258         return -EACCES;
1259     if (bdrv_in_use(bs))
1260         return -EBUSY;
1261     ret = drv->bdrv_truncate(bs, offset);
1262     if (ret == 0) {
1263         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1264         if (bs->change_cb) {
1265             bs->change_cb(bs->change_opaque, CHANGE_SIZE);
1266         }
1267     }
1268     return ret;
1269 }
1270
1271 /**
1272  * Length of a allocated file in bytes. Sparse files are counted by actual
1273  * allocated space. Return < 0 if error or unknown.
1274  */
1275 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1276 {
1277     BlockDriver *drv = bs->drv;
1278     if (!drv) {
1279         return -ENOMEDIUM;
1280     }
1281     if (drv->bdrv_get_allocated_file_size) {
1282         return drv->bdrv_get_allocated_file_size(bs);
1283     }
1284     if (bs->file) {
1285         return bdrv_get_allocated_file_size(bs->file);
1286     }
1287     return -ENOTSUP;
1288 }
1289
1290 /**
1291  * Length of a file in bytes. Return < 0 if error or unknown.
1292  */
1293 int64_t bdrv_getlength(BlockDriverState *bs)
1294 {
1295     BlockDriver *drv = bs->drv;
1296     if (!drv)
1297         return -ENOMEDIUM;
1298
1299     if (bs->growable || bs->removable) {
1300         if (drv->bdrv_getlength) {
1301             return drv->bdrv_getlength(bs);
1302         }
1303     }
1304     return bs->total_sectors * BDRV_SECTOR_SIZE;
1305 }
1306
1307 /* return 0 as number of sectors if no device present or error */
1308 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1309 {
1310     int64_t length;
1311     length = bdrv_getlength(bs);
1312     if (length < 0)
1313         length = 0;
1314     else
1315         length = length >> BDRV_SECTOR_BITS;
1316     *nb_sectors_ptr = length;
1317 }
1318
1319 struct partition {
1320         uint8_t boot_ind;           /* 0x80 - active */
1321         uint8_t head;               /* starting head */
1322         uint8_t sector;             /* starting sector */
1323         uint8_t cyl;                /* starting cylinder */
1324         uint8_t sys_ind;            /* What partition type */
1325         uint8_t end_head;           /* end head */
1326         uint8_t end_sector;         /* end sector */
1327         uint8_t end_cyl;            /* end cylinder */
1328         uint32_t start_sect;        /* starting sector counting from 0 */
1329         uint32_t nr_sects;          /* nr of sectors in partition */
1330 } QEMU_PACKED;
1331
1332 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1333 static int guess_disk_lchs(BlockDriverState *bs,
1334                            int *pcylinders, int *pheads, int *psectors)
1335 {
1336     uint8_t buf[BDRV_SECTOR_SIZE];
1337     int ret, i, heads, sectors, cylinders;
1338     struct partition *p;
1339     uint32_t nr_sects;
1340     uint64_t nb_sectors;
1341
1342     bdrv_get_geometry(bs, &nb_sectors);
1343
1344     ret = bdrv_read(bs, 0, buf, 1);
1345     if (ret < 0)
1346         return -1;
1347     /* test msdos magic */
1348     if (buf[510] != 0x55 || buf[511] != 0xaa)
1349         return -1;
1350     for(i = 0; i < 4; i++) {
1351         p = ((struct partition *)(buf + 0x1be)) + i;
1352         nr_sects = le32_to_cpu(p->nr_sects);
1353         if (nr_sects && p->end_head) {
1354             /* We make the assumption that the partition terminates on
1355                a cylinder boundary */
1356             heads = p->end_head + 1;
1357             sectors = p->end_sector & 63;
1358             if (sectors == 0)
1359                 continue;
1360             cylinders = nb_sectors / (heads * sectors);
1361             if (cylinders < 1 || cylinders > 16383)
1362                 continue;
1363             *pheads = heads;
1364             *psectors = sectors;
1365             *pcylinders = cylinders;
1366 #if 0
1367             printf("guessed geometry: LCHS=%d %d %d\n",
1368                    cylinders, heads, sectors);
1369 #endif
1370             return 0;
1371         }
1372     }
1373     return -1;
1374 }
1375
1376 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1377 {
1378     int translation, lba_detected = 0;
1379     int cylinders, heads, secs;
1380     uint64_t nb_sectors;
1381
1382     /* if a geometry hint is available, use it */
1383     bdrv_get_geometry(bs, &nb_sectors);
1384     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1385     translation = bdrv_get_translation_hint(bs);
1386     if (cylinders != 0) {
1387         *pcyls = cylinders;
1388         *pheads = heads;
1389         *psecs = secs;
1390     } else {
1391         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1392             if (heads > 16) {
1393                 /* if heads > 16, it means that a BIOS LBA
1394                    translation was active, so the default
1395                    hardware geometry is OK */
1396                 lba_detected = 1;
1397                 goto default_geometry;
1398             } else {
1399                 *pcyls = cylinders;
1400                 *pheads = heads;
1401                 *psecs = secs;
1402                 /* disable any translation to be in sync with
1403                    the logical geometry */
1404                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1405                     bdrv_set_translation_hint(bs,
1406                                               BIOS_ATA_TRANSLATION_NONE);
1407                 }
1408             }
1409         } else {
1410         default_geometry:
1411             /* if no geometry, use a standard physical disk geometry */
1412             cylinders = nb_sectors / (16 * 63);
1413
1414             if (cylinders > 16383)
1415                 cylinders = 16383;
1416             else if (cylinders < 2)
1417                 cylinders = 2;
1418             *pcyls = cylinders;
1419             *pheads = 16;
1420             *psecs = 63;
1421             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1422                 if ((*pcyls * *pheads) <= 131072) {
1423                     bdrv_set_translation_hint(bs,
1424                                               BIOS_ATA_TRANSLATION_LARGE);
1425                 } else {
1426                     bdrv_set_translation_hint(bs,
1427                                               BIOS_ATA_TRANSLATION_LBA);
1428                 }
1429             }
1430         }
1431         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1432     }
1433 }
1434
1435 void bdrv_set_geometry_hint(BlockDriverState *bs,
1436                             int cyls, int heads, int secs)
1437 {
1438     bs->cyls = cyls;
1439     bs->heads = heads;
1440     bs->secs = secs;
1441 }
1442
1443 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1444 {
1445     bs->translation = translation;
1446 }
1447
1448 void bdrv_get_geometry_hint(BlockDriverState *bs,
1449                             int *pcyls, int *pheads, int *psecs)
1450 {
1451     *pcyls = bs->cyls;
1452     *pheads = bs->heads;
1453     *psecs = bs->secs;
1454 }
1455
1456 /* Recognize floppy formats */
1457 typedef struct FDFormat {
1458     FDriveType drive;
1459     uint8_t last_sect;
1460     uint8_t max_track;
1461     uint8_t max_head;
1462 } FDFormat;
1463
1464 static const FDFormat fd_formats[] = {
1465     /* First entry is default format */
1466     /* 1.44 MB 3"1/2 floppy disks */
1467     { FDRIVE_DRV_144, 18, 80, 1, },
1468     { FDRIVE_DRV_144, 20, 80, 1, },
1469     { FDRIVE_DRV_144, 21, 80, 1, },
1470     { FDRIVE_DRV_144, 21, 82, 1, },
1471     { FDRIVE_DRV_144, 21, 83, 1, },
1472     { FDRIVE_DRV_144, 22, 80, 1, },
1473     { FDRIVE_DRV_144, 23, 80, 1, },
1474     { FDRIVE_DRV_144, 24, 80, 1, },
1475     /* 2.88 MB 3"1/2 floppy disks */
1476     { FDRIVE_DRV_288, 36, 80, 1, },
1477     { FDRIVE_DRV_288, 39, 80, 1, },
1478     { FDRIVE_DRV_288, 40, 80, 1, },
1479     { FDRIVE_DRV_288, 44, 80, 1, },
1480     { FDRIVE_DRV_288, 48, 80, 1, },
1481     /* 720 kB 3"1/2 floppy disks */
1482     { FDRIVE_DRV_144,  9, 80, 1, },
1483     { FDRIVE_DRV_144, 10, 80, 1, },
1484     { FDRIVE_DRV_144, 10, 82, 1, },
1485     { FDRIVE_DRV_144, 10, 83, 1, },
1486     { FDRIVE_DRV_144, 13, 80, 1, },
1487     { FDRIVE_DRV_144, 14, 80, 1, },
1488     /* 1.2 MB 5"1/4 floppy disks */
1489     { FDRIVE_DRV_120, 15, 80, 1, },
1490     { FDRIVE_DRV_120, 18, 80, 1, },
1491     { FDRIVE_DRV_120, 18, 82, 1, },
1492     { FDRIVE_DRV_120, 18, 83, 1, },
1493     { FDRIVE_DRV_120, 20, 80, 1, },
1494     /* 720 kB 5"1/4 floppy disks */
1495     { FDRIVE_DRV_120,  9, 80, 1, },
1496     { FDRIVE_DRV_120, 11, 80, 1, },
1497     /* 360 kB 5"1/4 floppy disks */
1498     { FDRIVE_DRV_120,  9, 40, 1, },
1499     { FDRIVE_DRV_120,  9, 40, 0, },
1500     { FDRIVE_DRV_120, 10, 41, 1, },
1501     { FDRIVE_DRV_120, 10, 42, 1, },
1502     /* 320 kB 5"1/4 floppy disks */
1503     { FDRIVE_DRV_120,  8, 40, 1, },
1504     { FDRIVE_DRV_120,  8, 40, 0, },
1505     /* 360 kB must match 5"1/4 better than 3"1/2... */
1506     { FDRIVE_DRV_144,  9, 80, 0, },
1507     /* end */
1508     { FDRIVE_DRV_NONE, -1, -1, 0, },
1509 };
1510
1511 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1512                                    int *max_track, int *last_sect,
1513                                    FDriveType drive_in, FDriveType *drive)
1514 {
1515     const FDFormat *parse;
1516     uint64_t nb_sectors, size;
1517     int i, first_match, match;
1518
1519     bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1520     if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1521         /* User defined disk */
1522     } else {
1523         bdrv_get_geometry(bs, &nb_sectors);
1524         match = -1;
1525         first_match = -1;
1526         for (i = 0; ; i++) {
1527             parse = &fd_formats[i];
1528             if (parse->drive == FDRIVE_DRV_NONE) {
1529                 break;
1530             }
1531             if (drive_in == parse->drive ||
1532                 drive_in == FDRIVE_DRV_NONE) {
1533                 size = (parse->max_head + 1) * parse->max_track *
1534                     parse->last_sect;
1535                 if (nb_sectors == size) {
1536                     match = i;
1537                     break;
1538                 }
1539                 if (first_match == -1) {
1540                     first_match = i;
1541                 }
1542             }
1543         }
1544         if (match == -1) {
1545             if (first_match == -1) {
1546                 match = 1;
1547             } else {
1548                 match = first_match;
1549             }
1550             parse = &fd_formats[match];
1551         }
1552         *nb_heads = parse->max_head + 1;
1553         *max_track = parse->max_track;
1554         *last_sect = parse->last_sect;
1555         *drive = parse->drive;
1556     }
1557 }
1558
1559 int bdrv_get_translation_hint(BlockDriverState *bs)
1560 {
1561     return bs->translation;
1562 }
1563
1564 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1565                        BlockErrorAction on_write_error)
1566 {
1567     bs->on_read_error = on_read_error;
1568     bs->on_write_error = on_write_error;
1569 }
1570
1571 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1572 {
1573     return is_read ? bs->on_read_error : bs->on_write_error;
1574 }
1575
1576 void bdrv_set_removable(BlockDriverState *bs, int removable)
1577 {
1578     bs->removable = removable;
1579     if (removable && bs == bs_snapshots) {
1580         bs_snapshots = NULL;
1581     }
1582 }
1583
1584 int bdrv_is_removable(BlockDriverState *bs)
1585 {
1586     return bs->removable;
1587 }
1588
1589 int bdrv_is_read_only(BlockDriverState *bs)
1590 {
1591     return bs->read_only;
1592 }
1593
1594 int bdrv_is_sg(BlockDriverState *bs)
1595 {
1596     return bs->sg;
1597 }
1598
1599 int bdrv_enable_write_cache(BlockDriverState *bs)
1600 {
1601     return bs->enable_write_cache;
1602 }
1603
1604 /* XXX: no longer used */
1605 void bdrv_set_change_cb(BlockDriverState *bs,
1606                         void (*change_cb)(void *opaque, int reason),
1607                         void *opaque)
1608 {
1609     bs->change_cb = change_cb;
1610     bs->change_opaque = opaque;
1611 }
1612
1613 int bdrv_is_encrypted(BlockDriverState *bs)
1614 {
1615     if (bs->backing_hd && bs->backing_hd->encrypted)
1616         return 1;
1617     return bs->encrypted;
1618 }
1619
1620 int bdrv_key_required(BlockDriverState *bs)
1621 {
1622     BlockDriverState *backing_hd = bs->backing_hd;
1623
1624     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1625         return 1;
1626     return (bs->encrypted && !bs->valid_key);
1627 }
1628
1629 int bdrv_set_key(BlockDriverState *bs, const char *key)
1630 {
1631     int ret;
1632     if (bs->backing_hd && bs->backing_hd->encrypted) {
1633         ret = bdrv_set_key(bs->backing_hd, key);
1634         if (ret < 0)
1635             return ret;
1636         if (!bs->encrypted)
1637             return 0;
1638     }
1639     if (!bs->encrypted) {
1640         return -EINVAL;
1641     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1642         return -ENOMEDIUM;
1643     }
1644     ret = bs->drv->bdrv_set_key(bs, key);
1645     if (ret < 0) {
1646         bs->valid_key = 0;
1647     } else if (!bs->valid_key) {
1648         bs->valid_key = 1;
1649         /* call the change callback now, we skipped it on open */
1650         bs->media_changed = 1;
1651         if (bs->change_cb)
1652             bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
1653     }
1654     return ret;
1655 }
1656
1657 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1658 {
1659     if (!bs->drv) {
1660         buf[0] = '\0';
1661     } else {
1662         pstrcpy(buf, buf_size, bs->drv->format_name);
1663     }
1664 }
1665
1666 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1667                          void *opaque)
1668 {
1669     BlockDriver *drv;
1670
1671     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1672         it(opaque, drv->format_name);
1673     }
1674 }
1675
1676 BlockDriverState *bdrv_find(const char *name)
1677 {
1678     BlockDriverState *bs;
1679
1680     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1681         if (!strcmp(name, bs->device_name)) {
1682             return bs;
1683         }
1684     }
1685     return NULL;
1686 }
1687
1688 BlockDriverState *bdrv_next(BlockDriverState *bs)
1689 {
1690     if (!bs) {
1691         return QTAILQ_FIRST(&bdrv_states);
1692     }
1693     return QTAILQ_NEXT(bs, list);
1694 }
1695
1696 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1697 {
1698     BlockDriverState *bs;
1699
1700     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1701         it(opaque, bs);
1702     }
1703 }
1704
1705 const char *bdrv_get_device_name(BlockDriverState *bs)
1706 {
1707     return bs->device_name;
1708 }
1709
1710 int bdrv_flush(BlockDriverState *bs)
1711 {
1712     if (bs->open_flags & BDRV_O_NO_FLUSH) {
1713         return 0;
1714     }
1715
1716     if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1717         return bdrv_co_flush_em(bs);
1718     }
1719
1720     if (bs->drv && bs->drv->bdrv_flush) {
1721         return bs->drv->bdrv_flush(bs);
1722     }
1723
1724     /*
1725      * Some block drivers always operate in either writethrough or unsafe mode
1726      * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1727      * the server works (because the behaviour is hardcoded or depends on
1728      * server-side configuration), so we can't ensure that everything is safe
1729      * on disk. Returning an error doesn't work because that would break guests
1730      * even if the server operates in writethrough mode.
1731      *
1732      * Let's hope the user knows what he's doing.
1733      */
1734     return 0;
1735 }
1736
1737 void bdrv_flush_all(void)
1738 {
1739     BlockDriverState *bs;
1740
1741     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1742         if (bs->drv && !bdrv_is_read_only(bs) &&
1743             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1744             bdrv_flush(bs);
1745         }
1746     }
1747 }
1748
1749 int bdrv_has_zero_init(BlockDriverState *bs)
1750 {
1751     assert(bs->drv);
1752
1753     if (bs->drv->bdrv_has_zero_init) {
1754         return bs->drv->bdrv_has_zero_init(bs);
1755     }
1756
1757     return 1;
1758 }
1759
1760 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1761 {
1762     if (!bs->drv) {
1763         return -ENOMEDIUM;
1764     }
1765     if (!bs->drv->bdrv_discard) {
1766         return 0;
1767     }
1768     return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1769 }
1770
1771 /*
1772  * Returns true iff the specified sector is present in the disk image. Drivers
1773  * not implementing the functionality are assumed to not support backing files,
1774  * hence all their sectors are reported as allocated.
1775  *
1776  * 'pnum' is set to the number of sectors (including and immediately following
1777  * the specified sector) that are known to be in the same
1778  * allocated/unallocated state.
1779  *
1780  * 'nb_sectors' is the max value 'pnum' should be set to.
1781  */
1782 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1783         int *pnum)
1784 {
1785     int64_t n;
1786     if (!bs->drv->bdrv_is_allocated) {
1787         if (sector_num >= bs->total_sectors) {
1788             *pnum = 0;
1789             return 0;
1790         }
1791         n = bs->total_sectors - sector_num;
1792         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1793         return 1;
1794     }
1795     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1796 }
1797
1798 void bdrv_mon_event(const BlockDriverState *bdrv,
1799                     BlockMonEventAction action, int is_read)
1800 {
1801     QObject *data;
1802     const char *action_str;
1803
1804     switch (action) {
1805     case BDRV_ACTION_REPORT:
1806         action_str = "report";
1807         break;
1808     case BDRV_ACTION_IGNORE:
1809         action_str = "ignore";
1810         break;
1811     case BDRV_ACTION_STOP:
1812         action_str = "stop";
1813         break;
1814     default:
1815         abort();
1816     }
1817
1818     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1819                               bdrv->device_name,
1820                               action_str,
1821                               is_read ? "read" : "write");
1822     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1823
1824     qobject_decref(data);
1825 }
1826
1827 static void bdrv_print_dict(QObject *obj, void *opaque)
1828 {
1829     QDict *bs_dict;
1830     Monitor *mon = opaque;
1831
1832     bs_dict = qobject_to_qdict(obj);
1833
1834     monitor_printf(mon, "%s: removable=%d",
1835                         qdict_get_str(bs_dict, "device"),
1836                         qdict_get_bool(bs_dict, "removable"));
1837
1838     if (qdict_get_bool(bs_dict, "removable")) {
1839         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1840     }
1841
1842     if (qdict_haskey(bs_dict, "inserted")) {
1843         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1844
1845         monitor_printf(mon, " file=");
1846         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1847         if (qdict_haskey(qdict, "backing_file")) {
1848             monitor_printf(mon, " backing_file=");
1849             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1850         }
1851         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1852                             qdict_get_bool(qdict, "ro"),
1853                             qdict_get_str(qdict, "drv"),
1854                             qdict_get_bool(qdict, "encrypted"));
1855     } else {
1856         monitor_printf(mon, " [not inserted]");
1857     }
1858
1859     monitor_printf(mon, "\n");
1860 }
1861
1862 void bdrv_info_print(Monitor *mon, const QObject *data)
1863 {
1864     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1865 }
1866
1867 void bdrv_info(Monitor *mon, QObject **ret_data)
1868 {
1869     QList *bs_list;
1870     BlockDriverState *bs;
1871
1872     bs_list = qlist_new();
1873
1874     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1875         QObject *bs_obj;
1876
1877         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1878                                     "'removable': %i, 'locked': %i }",
1879                                     bs->device_name, bs->removable,
1880                                     bs->locked);
1881
1882         if (bs->drv) {
1883             QObject *obj;
1884             QDict *bs_dict = qobject_to_qdict(bs_obj);
1885
1886             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1887                                      "'encrypted': %i }",
1888                                      bs->filename, bs->read_only,
1889                                      bs->drv->format_name,
1890                                      bdrv_is_encrypted(bs));
1891             if (bs->backing_file[0] != '\0') {
1892                 QDict *qdict = qobject_to_qdict(obj);
1893                 qdict_put(qdict, "backing_file",
1894                           qstring_from_str(bs->backing_file));
1895             }
1896
1897             qdict_put_obj(bs_dict, "inserted", obj);
1898         }
1899         qlist_append_obj(bs_list, bs_obj);
1900     }
1901
1902     *ret_data = QOBJECT(bs_list);
1903 }
1904
1905 static void bdrv_stats_iter(QObject *data, void *opaque)
1906 {
1907     QDict *qdict;
1908     Monitor *mon = opaque;
1909
1910     qdict = qobject_to_qdict(data);
1911     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1912
1913     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1914     monitor_printf(mon, " rd_bytes=%" PRId64
1915                         " wr_bytes=%" PRId64
1916                         " rd_operations=%" PRId64
1917                         " wr_operations=%" PRId64
1918                         " flush_operations=%" PRId64
1919                         " wr_total_time_ns=%" PRId64
1920                         " rd_total_time_ns=%" PRId64
1921                         " flush_total_time_ns=%" PRId64
1922                         "\n",
1923                         qdict_get_int(qdict, "rd_bytes"),
1924                         qdict_get_int(qdict, "wr_bytes"),
1925                         qdict_get_int(qdict, "rd_operations"),
1926                         qdict_get_int(qdict, "wr_operations"),
1927                         qdict_get_int(qdict, "flush_operations"),
1928                         qdict_get_int(qdict, "wr_total_time_ns"),
1929                         qdict_get_int(qdict, "rd_total_time_ns"),
1930                         qdict_get_int(qdict, "flush_total_time_ns"));
1931 }
1932
1933 void bdrv_stats_print(Monitor *mon, const QObject *data)
1934 {
1935     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1936 }
1937
1938 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1939 {
1940     QObject *res;
1941     QDict *dict;
1942
1943     res = qobject_from_jsonf("{ 'stats': {"
1944                              "'rd_bytes': %" PRId64 ","
1945                              "'wr_bytes': %" PRId64 ","
1946                              "'rd_operations': %" PRId64 ","
1947                              "'wr_operations': %" PRId64 ","
1948                              "'wr_highest_offset': %" PRId64 ","
1949                              "'flush_operations': %" PRId64 ","
1950                              "'wr_total_time_ns': %" PRId64 ","
1951                              "'rd_total_time_ns': %" PRId64 ","
1952                              "'flush_total_time_ns': %" PRId64
1953                              "} }",
1954                              bs->nr_bytes[BDRV_ACCT_READ],
1955                              bs->nr_bytes[BDRV_ACCT_WRITE],
1956                              bs->nr_ops[BDRV_ACCT_READ],
1957                              bs->nr_ops[BDRV_ACCT_WRITE],
1958                              bs->wr_highest_sector *
1959                              (uint64_t)BDRV_SECTOR_SIZE,
1960                              bs->nr_ops[BDRV_ACCT_FLUSH],
1961                              bs->total_time_ns[BDRV_ACCT_WRITE],
1962                              bs->total_time_ns[BDRV_ACCT_READ],
1963                              bs->total_time_ns[BDRV_ACCT_FLUSH]);
1964     dict  = qobject_to_qdict(res);
1965
1966     if (*bs->device_name) {
1967         qdict_put(dict, "device", qstring_from_str(bs->device_name));
1968     }
1969
1970     if (bs->file) {
1971         QObject *parent = bdrv_info_stats_bs(bs->file);
1972         qdict_put_obj(dict, "parent", parent);
1973     }
1974
1975     return res;
1976 }
1977
1978 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1979 {
1980     QObject *obj;
1981     QList *devices;
1982     BlockDriverState *bs;
1983
1984     devices = qlist_new();
1985
1986     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1987         obj = bdrv_info_stats_bs(bs);
1988         qlist_append_obj(devices, obj);
1989     }
1990
1991     *ret_data = QOBJECT(devices);
1992 }
1993
1994 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1995 {
1996     if (bs->backing_hd && bs->backing_hd->encrypted)
1997         return bs->backing_file;
1998     else if (bs->encrypted)
1999         return bs->filename;
2000     else
2001         return NULL;
2002 }
2003
2004 void bdrv_get_backing_filename(BlockDriverState *bs,
2005                                char *filename, int filename_size)
2006 {
2007     if (!bs->backing_file) {
2008         pstrcpy(filename, filename_size, "");
2009     } else {
2010         pstrcpy(filename, filename_size, bs->backing_file);
2011     }
2012 }
2013
2014 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2015                           const uint8_t *buf, int nb_sectors)
2016 {
2017     BlockDriver *drv = bs->drv;
2018     if (!drv)
2019         return -ENOMEDIUM;
2020     if (!drv->bdrv_write_compressed)
2021         return -ENOTSUP;
2022     if (bdrv_check_request(bs, sector_num, nb_sectors))
2023         return -EIO;
2024
2025     if (bs->dirty_bitmap) {
2026         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2027     }
2028
2029     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2030 }
2031
2032 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2033 {
2034     BlockDriver *drv = bs->drv;
2035     if (!drv)
2036         return -ENOMEDIUM;
2037     if (!drv->bdrv_get_info)
2038         return -ENOTSUP;
2039     memset(bdi, 0, sizeof(*bdi));
2040     return drv->bdrv_get_info(bs, bdi);
2041 }
2042
2043 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2044                       int64_t pos, int size)
2045 {
2046     BlockDriver *drv = bs->drv;
2047     if (!drv)
2048         return -ENOMEDIUM;
2049     if (drv->bdrv_save_vmstate)
2050         return drv->bdrv_save_vmstate(bs, buf, pos, size);
2051     if (bs->file)
2052         return bdrv_save_vmstate(bs->file, buf, pos, size);
2053     return -ENOTSUP;
2054 }
2055
2056 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2057                       int64_t pos, int size)
2058 {
2059     BlockDriver *drv = bs->drv;
2060     if (!drv)
2061         return -ENOMEDIUM;
2062     if (drv->bdrv_load_vmstate)
2063         return drv->bdrv_load_vmstate(bs, buf, pos, size);
2064     if (bs->file)
2065         return bdrv_load_vmstate(bs->file, buf, pos, size);
2066     return -ENOTSUP;
2067 }
2068
2069 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2070 {
2071     BlockDriver *drv = bs->drv;
2072
2073     if (!drv || !drv->bdrv_debug_event) {
2074         return;
2075     }
2076
2077     return drv->bdrv_debug_event(bs, event);
2078
2079 }
2080
2081 /**************************************************************/
2082 /* handling of snapshots */
2083
2084 int bdrv_can_snapshot(BlockDriverState *bs)
2085 {
2086     BlockDriver *drv = bs->drv;
2087     if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
2088         return 0;
2089     }
2090
2091     if (!drv->bdrv_snapshot_create) {
2092         if (bs->file != NULL) {
2093             return bdrv_can_snapshot(bs->file);
2094         }
2095         return 0;
2096     }
2097
2098     return 1;
2099 }
2100
2101 int bdrv_is_snapshot(BlockDriverState *bs)
2102 {
2103     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2104 }
2105
2106 BlockDriverState *bdrv_snapshots(void)
2107 {
2108     BlockDriverState *bs;
2109
2110     if (bs_snapshots) {
2111         return bs_snapshots;
2112     }
2113
2114     bs = NULL;
2115     while ((bs = bdrv_next(bs))) {
2116         if (bdrv_can_snapshot(bs)) {
2117             bs_snapshots = bs;
2118             return bs;
2119         }
2120     }
2121     return NULL;
2122 }
2123
2124 int bdrv_snapshot_create(BlockDriverState *bs,
2125                          QEMUSnapshotInfo *sn_info)
2126 {
2127     BlockDriver *drv = bs->drv;
2128     if (!drv)
2129         return -ENOMEDIUM;
2130     if (drv->bdrv_snapshot_create)
2131         return drv->bdrv_snapshot_create(bs, sn_info);
2132     if (bs->file)
2133         return bdrv_snapshot_create(bs->file, sn_info);
2134     return -ENOTSUP;
2135 }
2136
2137 int bdrv_snapshot_goto(BlockDriverState *bs,
2138                        const char *snapshot_id)
2139 {
2140     BlockDriver *drv = bs->drv;
2141     int ret, open_ret;
2142
2143     if (!drv)
2144         return -ENOMEDIUM;
2145     if (drv->bdrv_snapshot_goto)
2146         return drv->bdrv_snapshot_goto(bs, snapshot_id);
2147
2148     if (bs->file) {
2149         drv->bdrv_close(bs);
2150         ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2151         open_ret = drv->bdrv_open(bs, bs->open_flags);
2152         if (open_ret < 0) {
2153             bdrv_delete(bs->file);
2154             bs->drv = NULL;
2155             return open_ret;
2156         }
2157         return ret;
2158     }
2159
2160     return -ENOTSUP;
2161 }
2162
2163 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2164 {
2165     BlockDriver *drv = bs->drv;
2166     if (!drv)
2167         return -ENOMEDIUM;
2168     if (drv->bdrv_snapshot_delete)
2169         return drv->bdrv_snapshot_delete(bs, snapshot_id);
2170     if (bs->file)
2171         return bdrv_snapshot_delete(bs->file, snapshot_id);
2172     return -ENOTSUP;
2173 }
2174
2175 int bdrv_snapshot_list(BlockDriverState *bs,
2176                        QEMUSnapshotInfo **psn_info)
2177 {
2178     BlockDriver *drv = bs->drv;
2179     if (!drv)
2180         return -ENOMEDIUM;
2181     if (drv->bdrv_snapshot_list)
2182         return drv->bdrv_snapshot_list(bs, psn_info);
2183     if (bs->file)
2184         return bdrv_snapshot_list(bs->file, psn_info);
2185     return -ENOTSUP;
2186 }
2187
2188 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2189         const char *snapshot_name)
2190 {
2191     BlockDriver *drv = bs->drv;
2192     if (!drv) {
2193         return -ENOMEDIUM;
2194     }
2195     if (!bs->read_only) {
2196         return -EINVAL;
2197     }
2198     if (drv->bdrv_snapshot_load_tmp) {
2199         return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2200     }
2201     return -ENOTSUP;
2202 }
2203
2204 #define NB_SUFFIXES 4
2205
2206 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2207 {
2208     static const char suffixes[NB_SUFFIXES] = "KMGT";
2209     int64_t base;
2210     int i;
2211
2212     if (size <= 999) {
2213         snprintf(buf, buf_size, "%" PRId64, size);
2214     } else {
2215         base = 1024;
2216         for(i = 0; i < NB_SUFFIXES; i++) {
2217             if (size < (10 * base)) {
2218                 snprintf(buf, buf_size, "%0.1f%c",
2219                          (double)size / base,
2220                          suffixes[i]);
2221                 break;
2222             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2223                 snprintf(buf, buf_size, "%" PRId64 "%c",
2224                          ((size + (base >> 1)) / base),
2225                          suffixes[i]);
2226                 break;
2227             }
2228             base = base * 1024;
2229         }
2230     }
2231     return buf;
2232 }
2233
2234 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2235 {
2236     char buf1[128], date_buf[128], clock_buf[128];
2237 #ifdef _WIN32
2238     struct tm *ptm;
2239 #else
2240     struct tm tm;
2241 #endif
2242     time_t ti;
2243     int64_t secs;
2244
2245     if (!sn) {
2246         snprintf(buf, buf_size,
2247                  "%-10s%-20s%7s%20s%15s",
2248                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2249     } else {
2250         ti = sn->date_sec;
2251 #ifdef _WIN32
2252         ptm = localtime(&ti);
2253         strftime(date_buf, sizeof(date_buf),
2254                  "%Y-%m-%d %H:%M:%S", ptm);
2255 #else
2256         localtime_r(&ti, &tm);
2257         strftime(date_buf, sizeof(date_buf),
2258                  "%Y-%m-%d %H:%M:%S", &tm);
2259 #endif
2260         secs = sn->vm_clock_nsec / 1000000000;
2261         snprintf(clock_buf, sizeof(clock_buf),
2262                  "%02d:%02d:%02d.%03d",
2263                  (int)(secs / 3600),
2264                  (int)((secs / 60) % 60),
2265                  (int)(secs % 60),
2266                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
2267         snprintf(buf, buf_size,
2268                  "%-10s%-20s%7s%20s%15s",
2269                  sn->id_str, sn->name,
2270                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2271                  date_buf,
2272                  clock_buf);
2273     }
2274     return buf;
2275 }
2276
2277 /**************************************************************/
2278 /* async I/Os */
2279
2280 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2281                                  QEMUIOVector *qiov, int nb_sectors,
2282                                  BlockDriverCompletionFunc *cb, void *opaque)
2283 {
2284     BlockDriver *drv = bs->drv;
2285
2286     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2287
2288     if (!drv)
2289         return NULL;
2290     if (bdrv_check_request(bs, sector_num, nb_sectors))
2291         return NULL;
2292
2293     return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2294                                cb, opaque);
2295 }
2296
2297 typedef struct BlockCompleteData {
2298     BlockDriverCompletionFunc *cb;
2299     void *opaque;
2300     BlockDriverState *bs;
2301     int64_t sector_num;
2302     int nb_sectors;
2303 } BlockCompleteData;
2304
2305 static void block_complete_cb(void *opaque, int ret)
2306 {
2307     BlockCompleteData *b = opaque;
2308
2309     if (b->bs->dirty_bitmap) {
2310         set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2311     }
2312     b->cb(b->opaque, ret);
2313     g_free(b);
2314 }
2315
2316 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2317                                              int64_t sector_num,
2318                                              int nb_sectors,
2319                                              BlockDriverCompletionFunc *cb,
2320                                              void *opaque)
2321 {
2322     BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
2323
2324     blkdata->bs = bs;
2325     blkdata->cb = cb;
2326     blkdata->opaque = opaque;
2327     blkdata->sector_num = sector_num;
2328     blkdata->nb_sectors = nb_sectors;
2329
2330     return blkdata;
2331 }
2332
2333 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2334                                   QEMUIOVector *qiov, int nb_sectors,
2335                                   BlockDriverCompletionFunc *cb, void *opaque)
2336 {
2337     BlockDriver *drv = bs->drv;
2338     BlockDriverAIOCB *ret;
2339     BlockCompleteData *blk_cb_data;
2340
2341     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2342
2343     if (!drv)
2344         return NULL;
2345     if (bs->read_only)
2346         return NULL;
2347     if (bdrv_check_request(bs, sector_num, nb_sectors))
2348         return NULL;
2349
2350     if (bs->dirty_bitmap) {
2351         blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2352                                          opaque);
2353         cb = &block_complete_cb;
2354         opaque = blk_cb_data;
2355     }
2356
2357     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2358                                cb, opaque);
2359
2360     if (ret) {
2361         if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2362             bs->wr_highest_sector = sector_num + nb_sectors - 1;
2363         }
2364     }
2365
2366     return ret;
2367 }
2368
2369
2370 typedef struct MultiwriteCB {
2371     int error;
2372     int num_requests;
2373     int num_callbacks;
2374     struct {
2375         BlockDriverCompletionFunc *cb;
2376         void *opaque;
2377         QEMUIOVector *free_qiov;
2378         void *free_buf;
2379     } callbacks[];
2380 } MultiwriteCB;
2381
2382 static void multiwrite_user_cb(MultiwriteCB *mcb)
2383 {
2384     int i;
2385
2386     for (i = 0; i < mcb->num_callbacks; i++) {
2387         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2388         if (mcb->callbacks[i].free_qiov) {
2389             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2390         }
2391         g_free(mcb->callbacks[i].free_qiov);
2392         qemu_vfree(mcb->callbacks[i].free_buf);
2393     }
2394 }
2395
2396 static void multiwrite_cb(void *opaque, int ret)
2397 {
2398     MultiwriteCB *mcb = opaque;
2399
2400     trace_multiwrite_cb(mcb, ret);
2401
2402     if (ret < 0 && !mcb->error) {
2403         mcb->error = ret;
2404     }
2405
2406     mcb->num_requests--;
2407     if (mcb->num_requests == 0) {
2408         multiwrite_user_cb(mcb);
2409         g_free(mcb);
2410     }
2411 }
2412
2413 static int multiwrite_req_compare(const void *a, const void *b)
2414 {
2415     const BlockRequest *req1 = a, *req2 = b;
2416
2417     /*
2418      * Note that we can't simply subtract req2->sector from req1->sector
2419      * here as that could overflow the return value.
2420      */
2421     if (req1->sector > req2->sector) {
2422         return 1;
2423     } else if (req1->sector < req2->sector) {
2424         return -1;
2425     } else {
2426         return 0;
2427     }
2428 }
2429
2430 /*
2431  * Takes a bunch of requests and tries to merge them. Returns the number of
2432  * requests that remain after merging.
2433  */
2434 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2435     int num_reqs, MultiwriteCB *mcb)
2436 {
2437     int i, outidx;
2438
2439     // Sort requests by start sector
2440     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2441
2442     // Check if adjacent requests touch the same clusters. If so, combine them,
2443     // filling up gaps with zero sectors.
2444     outidx = 0;
2445     for (i = 1; i < num_reqs; i++) {
2446         int merge = 0;
2447         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2448
2449         // This handles the cases that are valid for all block drivers, namely
2450         // exactly sequential writes and overlapping writes.
2451         if (reqs[i].sector <= oldreq_last) {
2452             merge = 1;
2453         }
2454
2455         // The block driver may decide that it makes sense to combine requests
2456         // even if there is a gap of some sectors between them. In this case,
2457         // the gap is filled with zeros (therefore only applicable for yet
2458         // unused space in format like qcow2).
2459         if (!merge && bs->drv->bdrv_merge_requests) {
2460             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2461         }
2462
2463         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2464             merge = 0;
2465         }
2466
2467         if (merge) {
2468             size_t size;
2469             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2470             qemu_iovec_init(qiov,
2471                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2472
2473             // Add the first request to the merged one. If the requests are
2474             // overlapping, drop the last sectors of the first request.
2475             size = (reqs[i].sector - reqs[outidx].sector) << 9;
2476             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2477
2478             // We might need to add some zeros between the two requests
2479             if (reqs[i].sector > oldreq_last) {
2480                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2481                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2482                 memset(buf, 0, zero_bytes);
2483                 qemu_iovec_add(qiov, buf, zero_bytes);
2484                 mcb->callbacks[i].free_buf = buf;
2485             }
2486
2487             // Add the second request
2488             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2489
2490             reqs[outidx].nb_sectors = qiov->size >> 9;
2491             reqs[outidx].qiov = qiov;
2492
2493             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2494         } else {
2495             outidx++;
2496             reqs[outidx].sector     = reqs[i].sector;
2497             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2498             reqs[outidx].qiov       = reqs[i].qiov;
2499         }
2500     }
2501
2502     return outidx + 1;
2503 }
2504
2505 /*
2506  * Submit multiple AIO write requests at once.
2507  *
2508  * On success, the function returns 0 and all requests in the reqs array have
2509  * been submitted. In error case this function returns -1, and any of the
2510  * requests may or may not be submitted yet. In particular, this means that the
2511  * callback will be called for some of the requests, for others it won't. The
2512  * caller must check the error field of the BlockRequest to wait for the right
2513  * callbacks (if error != 0, no callback will be called).
2514  *
2515  * The implementation may modify the contents of the reqs array, e.g. to merge
2516  * requests. However, the fields opaque and error are left unmodified as they
2517  * are used to signal failure for a single request to the caller.
2518  */
2519 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2520 {
2521     BlockDriverAIOCB *acb;
2522     MultiwriteCB *mcb;
2523     int i;
2524
2525     /* don't submit writes if we don't have a medium */
2526     if (bs->drv == NULL) {
2527         for (i = 0; i < num_reqs; i++) {
2528             reqs[i].error = -ENOMEDIUM;
2529         }
2530         return -1;
2531     }
2532
2533     if (num_reqs == 0) {
2534         return 0;
2535     }
2536
2537     // Create MultiwriteCB structure
2538     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2539     mcb->num_requests = 0;
2540     mcb->num_callbacks = num_reqs;
2541
2542     for (i = 0; i < num_reqs; i++) {
2543         mcb->callbacks[i].cb = reqs[i].cb;
2544         mcb->callbacks[i].opaque = reqs[i].opaque;
2545     }
2546
2547     // Check for mergable requests
2548     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2549
2550     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2551
2552     /*
2553      * Run the aio requests. As soon as one request can't be submitted
2554      * successfully, fail all requests that are not yet submitted (we must
2555      * return failure for all requests anyway)
2556      *
2557      * num_requests cannot be set to the right value immediately: If
2558      * bdrv_aio_writev fails for some request, num_requests would be too high
2559      * and therefore multiwrite_cb() would never recognize the multiwrite
2560      * request as completed. We also cannot use the loop variable i to set it
2561      * when the first request fails because the callback may already have been
2562      * called for previously submitted requests. Thus, num_requests must be
2563      * incremented for each request that is submitted.
2564      *
2565      * The problem that callbacks may be called early also means that we need
2566      * to take care that num_requests doesn't become 0 before all requests are
2567      * submitted - multiwrite_cb() would consider the multiwrite request
2568      * completed. A dummy request that is "completed" by a manual call to
2569      * multiwrite_cb() takes care of this.
2570      */
2571     mcb->num_requests = 1;
2572
2573     // Run the aio requests
2574     for (i = 0; i < num_reqs; i++) {
2575         mcb->num_requests++;
2576         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2577             reqs[i].nb_sectors, multiwrite_cb, mcb);
2578
2579         if (acb == NULL) {
2580             // We can only fail the whole thing if no request has been
2581             // submitted yet. Otherwise we'll wait for the submitted AIOs to
2582             // complete and report the error in the callback.
2583             if (i == 0) {
2584                 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2585                 goto fail;
2586             } else {
2587                 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2588                 multiwrite_cb(mcb, -EIO);
2589                 break;
2590             }
2591         }
2592     }
2593
2594     /* Complete the dummy request */
2595     multiwrite_cb(mcb, 0);
2596
2597     return 0;
2598
2599 fail:
2600     for (i = 0; i < mcb->num_callbacks; i++) {
2601         reqs[i].error = -EIO;
2602     }
2603     g_free(mcb);
2604     return -1;
2605 }
2606
2607 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2608         BlockDriverCompletionFunc *cb, void *opaque)
2609 {
2610     BlockDriver *drv = bs->drv;
2611
2612     trace_bdrv_aio_flush(bs, opaque);
2613
2614     if (bs->open_flags & BDRV_O_NO_FLUSH) {
2615         return bdrv_aio_noop_em(bs, cb, opaque);
2616     }
2617
2618     if (!drv)
2619         return NULL;
2620     return drv->bdrv_aio_flush(bs, cb, opaque);
2621 }
2622
2623 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2624 {
2625     acb->pool->cancel(acb);
2626 }
2627
2628
2629 /**************************************************************/
2630 /* async block device emulation */
2631
2632 typedef struct BlockDriverAIOCBSync {
2633     BlockDriverAIOCB common;
2634     QEMUBH *bh;
2635     int ret;
2636     /* vector translation state */
2637     QEMUIOVector *qiov;
2638     uint8_t *bounce;
2639     int is_write;
2640 } BlockDriverAIOCBSync;
2641
2642 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2643 {
2644     BlockDriverAIOCBSync *acb =
2645         container_of(blockacb, BlockDriverAIOCBSync, common);
2646     qemu_bh_delete(acb->bh);
2647     acb->bh = NULL;
2648     qemu_aio_release(acb);
2649 }
2650
2651 static AIOPool bdrv_em_aio_pool = {
2652     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2653     .cancel             = bdrv_aio_cancel_em,
2654 };
2655
2656 static void bdrv_aio_bh_cb(void *opaque)
2657 {
2658     BlockDriverAIOCBSync *acb = opaque;
2659
2660     if (!acb->is_write)
2661         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2662     qemu_vfree(acb->bounce);
2663     acb->common.cb(acb->common.opaque, acb->ret);
2664     qemu_bh_delete(acb->bh);
2665     acb->bh = NULL;
2666     qemu_aio_release(acb);
2667 }
2668
2669 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2670                                             int64_t sector_num,
2671                                             QEMUIOVector *qiov,
2672                                             int nb_sectors,
2673                                             BlockDriverCompletionFunc *cb,
2674                                             void *opaque,
2675                                             int is_write)
2676
2677 {
2678     BlockDriverAIOCBSync *acb;
2679
2680     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2681     acb->is_write = is_write;
2682     acb->qiov = qiov;
2683     acb->bounce = qemu_blockalign(bs, qiov->size);
2684
2685     if (!acb->bh)
2686         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2687
2688     if (is_write) {
2689         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2690         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2691     } else {
2692         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2693     }
2694
2695     qemu_bh_schedule(acb->bh);
2696
2697     return &acb->common;
2698 }
2699
2700 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2701         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2702         BlockDriverCompletionFunc *cb, void *opaque)
2703 {
2704     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2705 }
2706
2707 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2708         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2709         BlockDriverCompletionFunc *cb, void *opaque)
2710 {
2711     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2712 }
2713
2714
2715 typedef struct BlockDriverAIOCBCoroutine {
2716     BlockDriverAIOCB common;
2717     BlockRequest req;
2718     bool is_write;
2719     QEMUBH* bh;
2720 } BlockDriverAIOCBCoroutine;
2721
2722 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2723 {
2724     qemu_aio_flush();
2725 }
2726
2727 static AIOPool bdrv_em_co_aio_pool = {
2728     .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
2729     .cancel             = bdrv_aio_co_cancel_em,
2730 };
2731
2732 static void bdrv_co_rw_bh(void *opaque)
2733 {
2734     BlockDriverAIOCBCoroutine *acb = opaque;
2735
2736     acb->common.cb(acb->common.opaque, acb->req.error);
2737     qemu_bh_delete(acb->bh);
2738     qemu_aio_release(acb);
2739 }
2740
2741 static void coroutine_fn bdrv_co_rw(void *opaque)
2742 {
2743     BlockDriverAIOCBCoroutine *acb = opaque;
2744     BlockDriverState *bs = acb->common.bs;
2745
2746     if (!acb->is_write) {
2747         acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2748             acb->req.nb_sectors, acb->req.qiov);
2749     } else {
2750         acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2751             acb->req.nb_sectors, acb->req.qiov);
2752     }
2753
2754     acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2755     qemu_bh_schedule(acb->bh);
2756 }
2757
2758 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2759                                                int64_t sector_num,
2760                                                QEMUIOVector *qiov,
2761                                                int nb_sectors,
2762                                                BlockDriverCompletionFunc *cb,
2763                                                void *opaque,
2764                                                bool is_write)
2765 {
2766     Coroutine *co;
2767     BlockDriverAIOCBCoroutine *acb;
2768
2769     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2770     acb->req.sector = sector_num;
2771     acb->req.nb_sectors = nb_sectors;
2772     acb->req.qiov = qiov;
2773     acb->is_write = is_write;
2774
2775     co = qemu_coroutine_create(bdrv_co_rw);
2776     qemu_coroutine_enter(co, acb);
2777
2778     return &acb->common;
2779 }
2780
2781 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2782         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2783         BlockDriverCompletionFunc *cb, void *opaque)
2784 {
2785     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2786                                  false);
2787 }
2788
2789 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2790         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2791         BlockDriverCompletionFunc *cb, void *opaque)
2792 {
2793     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2794                                  true);
2795 }
2796
2797 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2798         BlockDriverCompletionFunc *cb, void *opaque)
2799 {
2800     BlockDriverAIOCBSync *acb;
2801
2802     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2803     acb->is_write = 1; /* don't bounce in the completion hadler */
2804     acb->qiov = NULL;
2805     acb->bounce = NULL;
2806     acb->ret = 0;
2807
2808     if (!acb->bh)
2809         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2810
2811     bdrv_flush(bs);
2812     qemu_bh_schedule(acb->bh);
2813     return &acb->common;
2814 }
2815
2816 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2817         BlockDriverCompletionFunc *cb, void *opaque)
2818 {
2819     BlockDriverAIOCBSync *acb;
2820
2821     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2822     acb->is_write = 1; /* don't bounce in the completion handler */
2823     acb->qiov = NULL;
2824     acb->bounce = NULL;
2825     acb->ret = 0;
2826
2827     if (!acb->bh) {
2828         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2829     }
2830
2831     qemu_bh_schedule(acb->bh);
2832     return &acb->common;
2833 }
2834
2835 /**************************************************************/
2836 /* sync block device emulation */
2837
2838 static void bdrv_rw_em_cb(void *opaque, int ret)
2839 {
2840     *(int *)opaque = ret;
2841 }
2842
2843 #define NOT_DONE 0x7fffffff
2844
2845 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2846                         uint8_t *buf, int nb_sectors)
2847 {
2848     int async_ret;
2849     BlockDriverAIOCB *acb;
2850     struct iovec iov;
2851     QEMUIOVector qiov;
2852
2853     async_ret = NOT_DONE;
2854     iov.iov_base = (void *)buf;
2855     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2856     qemu_iovec_init_external(&qiov, &iov, 1);
2857     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2858         bdrv_rw_em_cb, &async_ret);
2859     if (acb == NULL) {
2860         async_ret = -1;
2861         goto fail;
2862     }
2863
2864     while (async_ret == NOT_DONE) {
2865         qemu_aio_wait();
2866     }
2867
2868
2869 fail:
2870     return async_ret;
2871 }
2872
2873 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2874                          const uint8_t *buf, int nb_sectors)
2875 {
2876     int async_ret;
2877     BlockDriverAIOCB *acb;
2878     struct iovec iov;
2879     QEMUIOVector qiov;
2880
2881     async_ret = NOT_DONE;
2882     iov.iov_base = (void *)buf;
2883     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2884     qemu_iovec_init_external(&qiov, &iov, 1);
2885     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2886         bdrv_rw_em_cb, &async_ret);
2887     if (acb == NULL) {
2888         async_ret = -1;
2889         goto fail;
2890     }
2891     while (async_ret == NOT_DONE) {
2892         qemu_aio_wait();
2893     }
2894
2895 fail:
2896     return async_ret;
2897 }
2898
2899 void bdrv_init(void)
2900 {
2901     module_call_init(MODULE_INIT_BLOCK);
2902 }
2903
2904 void bdrv_init_with_whitelist(void)
2905 {
2906     use_bdrv_whitelist = 1;
2907     bdrv_init();
2908 }
2909
2910 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2911                    BlockDriverCompletionFunc *cb, void *opaque)
2912 {
2913     BlockDriverAIOCB *acb;
2914
2915     if (pool->free_aiocb) {
2916         acb = pool->free_aiocb;
2917         pool->free_aiocb = acb->next;
2918     } else {
2919         acb = g_malloc0(pool->aiocb_size);
2920         acb->pool = pool;
2921     }
2922     acb->bs = bs;
2923     acb->cb = cb;
2924     acb->opaque = opaque;
2925     return acb;
2926 }
2927
2928 void qemu_aio_release(void *p)
2929 {
2930     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2931     AIOPool *pool = acb->pool;
2932     acb->next = pool->free_aiocb;
2933     pool->free_aiocb = acb;
2934 }
2935
2936 /**************************************************************/
2937 /* Coroutine block device emulation */
2938
2939 typedef struct CoroutineIOCompletion {
2940     Coroutine *coroutine;
2941     int ret;
2942 } CoroutineIOCompletion;
2943
2944 static void bdrv_co_io_em_complete(void *opaque, int ret)
2945 {
2946     CoroutineIOCompletion *co = opaque;
2947
2948     co->ret = ret;
2949     qemu_coroutine_enter(co->coroutine, NULL);
2950 }
2951
2952 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2953                                       int nb_sectors, QEMUIOVector *iov,
2954                                       bool is_write)
2955 {
2956     CoroutineIOCompletion co = {
2957         .coroutine = qemu_coroutine_self(),
2958     };
2959     BlockDriverAIOCB *acb;
2960
2961     if (is_write) {
2962         acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2963                               bdrv_co_io_em_complete, &co);
2964     } else {
2965         acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2966                              bdrv_co_io_em_complete, &co);
2967     }
2968
2969     trace_bdrv_co_io(is_write, acb);
2970     if (!acb) {
2971         return -EIO;
2972     }
2973     qemu_coroutine_yield();
2974
2975     return co.ret;
2976 }
2977
2978 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2979                                          int64_t sector_num, int nb_sectors,
2980                                          QEMUIOVector *iov)
2981 {
2982     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2983 }
2984
2985 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2986                                          int64_t sector_num, int nb_sectors,
2987                                          QEMUIOVector *iov)
2988 {
2989     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2990 }
2991
2992 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
2993 {
2994     CoroutineIOCompletion co = {
2995         .coroutine = qemu_coroutine_self(),
2996     };
2997     BlockDriverAIOCB *acb;
2998
2999     acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3000     if (!acb) {
3001         return -EIO;
3002     }
3003     qemu_coroutine_yield();
3004     return co.ret;
3005 }
3006
3007 /**************************************************************/
3008 /* removable device support */
3009
3010 /**
3011  * Return TRUE if the media is present
3012  */
3013 int bdrv_is_inserted(BlockDriverState *bs)
3014 {
3015     BlockDriver *drv = bs->drv;
3016     int ret;
3017     if (!drv)
3018         return 0;
3019     if (!drv->bdrv_is_inserted)
3020         return !bs->tray_open;
3021     ret = drv->bdrv_is_inserted(bs);
3022     return ret;
3023 }
3024
3025 /**
3026  * Return TRUE if the media changed since the last call to this
3027  * function. It is currently only used for floppy disks
3028  */
3029 int bdrv_media_changed(BlockDriverState *bs)
3030 {
3031     BlockDriver *drv = bs->drv;
3032     int ret;
3033
3034     if (!drv || !drv->bdrv_media_changed)
3035         ret = -ENOTSUP;
3036     else
3037         ret = drv->bdrv_media_changed(bs);
3038     if (ret == -ENOTSUP)
3039         ret = bs->media_changed;
3040     bs->media_changed = 0;
3041     return ret;
3042 }
3043
3044 /**
3045  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3046  */
3047 int bdrv_eject(BlockDriverState *bs, int eject_flag)
3048 {
3049     BlockDriver *drv = bs->drv;
3050
3051     if (eject_flag && bs->locked) {
3052         return -EBUSY;
3053     }
3054
3055     if (drv && drv->bdrv_eject) {
3056         drv->bdrv_eject(bs, eject_flag);
3057     }
3058     bs->tray_open = eject_flag;
3059     return 0;
3060 }
3061
3062 int bdrv_is_locked(BlockDriverState *bs)
3063 {
3064     return bs->locked;
3065 }
3066
3067 /**
3068  * Lock or unlock the media (if it is locked, the user won't be able
3069  * to eject it manually).
3070  */
3071 void bdrv_set_locked(BlockDriverState *bs, int locked)
3072 {
3073     BlockDriver *drv = bs->drv;
3074
3075     trace_bdrv_set_locked(bs, locked);
3076
3077     bs->locked = locked;
3078     if (drv && drv->bdrv_set_locked) {
3079         drv->bdrv_set_locked(bs, locked);
3080     }
3081 }
3082
3083 /* needed for generic scsi interface */
3084
3085 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3086 {
3087     BlockDriver *drv = bs->drv;
3088
3089     if (drv && drv->bdrv_ioctl)
3090         return drv->bdrv_ioctl(bs, req, buf);
3091     return -ENOTSUP;
3092 }
3093
3094 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3095         unsigned long int req, void *buf,
3096         BlockDriverCompletionFunc *cb, void *opaque)
3097 {
3098     BlockDriver *drv = bs->drv;
3099
3100     if (drv && drv->bdrv_aio_ioctl)
3101         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3102     return NULL;
3103 }
3104
3105
3106
3107 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3108 {
3109     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3110 }
3111
3112 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3113 {
3114     int64_t bitmap_size;
3115
3116     bs->dirty_count = 0;
3117     if (enable) {
3118         if (!bs->dirty_bitmap) {
3119             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3120                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3121             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3122
3123             bs->dirty_bitmap = g_malloc0(bitmap_size);
3124         }
3125     } else {
3126         if (bs->dirty_bitmap) {
3127             g_free(bs->dirty_bitmap);
3128             bs->dirty_bitmap = NULL;
3129         }
3130     }
3131 }
3132
3133 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3134 {
3135     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3136
3137     if (bs->dirty_bitmap &&
3138         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3139         return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3140             (1UL << (chunk % (sizeof(unsigned long) * 8))));
3141     } else {
3142         return 0;
3143     }
3144 }
3145
3146 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3147                       int nr_sectors)
3148 {
3149     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3150 }
3151
3152 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3153 {
3154     return bs->dirty_count;
3155 }
3156
3157 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3158 {
3159     assert(bs->in_use != in_use);
3160     bs->in_use = in_use;
3161 }
3162
3163 int bdrv_in_use(BlockDriverState *bs)
3164 {
3165     return bs->in_use;
3166 }
3167
3168 void
3169 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3170         enum BlockAcctType type)
3171 {
3172     assert(type < BDRV_MAX_IOTYPE);
3173
3174     cookie->bytes = bytes;
3175     cookie->start_time_ns = get_clock();
3176     cookie->type = type;
3177 }
3178
3179 void
3180 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3181 {
3182     assert(cookie->type < BDRV_MAX_IOTYPE);
3183
3184     bs->nr_bytes[cookie->type] += cookie->bytes;
3185     bs->nr_ops[cookie->type]++;
3186     bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3187 }
3188
3189 int bdrv_img_create(const char *filename, const char *fmt,
3190                     const char *base_filename, const char *base_fmt,
3191                     char *options, uint64_t img_size, int flags)
3192 {
3193     QEMUOptionParameter *param = NULL, *create_options = NULL;
3194     QEMUOptionParameter *backing_fmt, *backing_file, *size;
3195     BlockDriverState *bs = NULL;
3196     BlockDriver *drv, *proto_drv;
3197     BlockDriver *backing_drv = NULL;
3198     int ret = 0;
3199
3200     /* Find driver and parse its options */
3201     drv = bdrv_find_format(fmt);
3202     if (!drv) {
3203         error_report("Unknown file format '%s'", fmt);
3204         ret = -EINVAL;
3205         goto out;
3206     }
3207
3208     proto_drv = bdrv_find_protocol(filename);
3209     if (!proto_drv) {
3210         error_report("Unknown protocol '%s'", filename);
3211         ret = -EINVAL;
3212         goto out;
3213     }
3214
3215     create_options = append_option_parameters(create_options,
3216                                               drv->create_options);
3217     create_options = append_option_parameters(create_options,
3218                                               proto_drv->create_options);
3219
3220     /* Create parameter list with default values */
3221     param = parse_option_parameters("", create_options, param);
3222
3223     set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3224
3225     /* Parse -o options */
3226     if (options) {
3227         param = parse_option_parameters(options, create_options, param);
3228         if (param == NULL) {
3229             error_report("Invalid options for file format '%s'.", fmt);
3230             ret = -EINVAL;
3231             goto out;
3232         }
3233     }
3234
3235     if (base_filename) {
3236         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3237                                  base_filename)) {
3238             error_report("Backing file not supported for file format '%s'",
3239                          fmt);
3240             ret = -EINVAL;
3241             goto out;
3242         }
3243     }
3244
3245     if (base_fmt) {
3246         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3247             error_report("Backing file format not supported for file "
3248                          "format '%s'", fmt);
3249             ret = -EINVAL;
3250             goto out;
3251         }
3252     }
3253
3254     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3255     if (backing_file && backing_file->value.s) {
3256         if (!strcmp(filename, backing_file->value.s)) {
3257             error_report("Error: Trying to create an image with the "
3258                          "same filename as the backing file");
3259             ret = -EINVAL;
3260             goto out;
3261         }
3262     }
3263
3264     backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3265     if (backing_fmt && backing_fmt->value.s) {
3266         backing_drv = bdrv_find_format(backing_fmt->value.s);
3267         if (!backing_drv) {
3268             error_report("Unknown backing file format '%s'",
3269                          backing_fmt->value.s);
3270             ret = -EINVAL;
3271             goto out;
3272         }
3273     }
3274
3275     // The size for the image must always be specified, with one exception:
3276     // If we are using a backing file, we can obtain the size from there
3277     size = get_option_parameter(param, BLOCK_OPT_SIZE);
3278     if (size && size->value.n == -1) {
3279         if (backing_file && backing_file->value.s) {
3280             uint64_t size;
3281             char buf[32];
3282
3283             bs = bdrv_new("");
3284
3285             ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3286             if (ret < 0) {
3287                 error_report("Could not open '%s'", backing_file->value.s);
3288                 goto out;
3289             }
3290             bdrv_get_geometry(bs, &size);
3291             size *= 512;
3292
3293             snprintf(buf, sizeof(buf), "%" PRId64, size);
3294             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3295         } else {
3296             error_report("Image creation needs a size parameter");
3297             ret = -EINVAL;
3298             goto out;
3299         }
3300     }
3301
3302     printf("Formatting '%s', fmt=%s ", filename, fmt);
3303     print_option_parameters(param);
3304     puts("");
3305
3306     ret = bdrv_create(drv, filename, param);
3307
3308     if (ret < 0) {
3309         if (ret == -ENOTSUP) {
3310             error_report("Formatting or formatting option not supported for "
3311                          "file format '%s'", fmt);
3312         } else if (ret == -EFBIG) {
3313             error_report("The image size is too large for file format '%s'",
3314                          fmt);
3315         } else {
3316             error_report("%s: error while creating %s: %s", filename, fmt,
3317                          strerror(-ret));
3318         }
3319     }
3320
3321 out:
3322     free_option_parameters(create_options);
3323     free_option_parameters(param);
3324
3325     if (bs) {
3326         bdrv_delete(bs);
3327     }
3328
3329     return ret;
3330 }
This page took 0.199996 seconds and 4 git commands to generate.