]> Git Repo - qemu.git/blob - block.c
Move main signal handler setup to os specificfiles.
[qemu.git] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
29 #include "qemu-objects.h"
30
31 #ifdef CONFIG_BSD
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/ioctl.h>
35 #include <sys/queue.h>
36 #ifndef __DragonFly__
37 #include <sys/disk.h>
38 #endif
39 #endif
40
41 #ifdef _WIN32
42 #include <windows.h>
43 #endif
44
45 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
46         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
47         BlockDriverCompletionFunc *cb, void *opaque);
48 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
49         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50         BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
54         BlockDriverCompletionFunc *cb, void *opaque);
55 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
56                         uint8_t *buf, int nb_sectors);
57 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
58                          const uint8_t *buf, int nb_sectors);
59
60 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
61     QTAILQ_HEAD_INITIALIZER(bdrv_states);
62
63 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
64     QLIST_HEAD_INITIALIZER(bdrv_drivers);
65
66 /* If non-zero, use only whitelisted block drivers */
67 static int use_bdrv_whitelist;
68
69 int path_is_absolute(const char *path)
70 {
71     const char *p;
72 #ifdef _WIN32
73     /* specific case for names like: "\\.\d:" */
74     if (*path == '/' || *path == '\\')
75         return 1;
76 #endif
77     p = strchr(path, ':');
78     if (p)
79         p++;
80     else
81         p = path;
82 #ifdef _WIN32
83     return (*p == '/' || *p == '\\');
84 #else
85     return (*p == '/');
86 #endif
87 }
88
89 /* if filename is absolute, just copy it to dest. Otherwise, build a
90    path to it by considering it is relative to base_path. URL are
91    supported. */
92 void path_combine(char *dest, int dest_size,
93                   const char *base_path,
94                   const char *filename)
95 {
96     const char *p, *p1;
97     int len;
98
99     if (dest_size <= 0)
100         return;
101     if (path_is_absolute(filename)) {
102         pstrcpy(dest, dest_size, filename);
103     } else {
104         p = strchr(base_path, ':');
105         if (p)
106             p++;
107         else
108             p = base_path;
109         p1 = strrchr(base_path, '/');
110 #ifdef _WIN32
111         {
112             const char *p2;
113             p2 = strrchr(base_path, '\\');
114             if (!p1 || p2 > p1)
115                 p1 = p2;
116         }
117 #endif
118         if (p1)
119             p1++;
120         else
121             p1 = base_path;
122         if (p1 > p)
123             p = p1;
124         len = p - base_path;
125         if (len > dest_size - 1)
126             len = dest_size - 1;
127         memcpy(dest, base_path, len);
128         dest[len] = '\0';
129         pstrcat(dest, dest_size, filename);
130     }
131 }
132
133 void bdrv_register(BlockDriver *bdrv)
134 {
135     if (!bdrv->bdrv_aio_readv) {
136         /* add AIO emulation layer */
137         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
138         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
139     } else if (!bdrv->bdrv_read) {
140         /* add synchronous IO emulation layer */
141         bdrv->bdrv_read = bdrv_read_em;
142         bdrv->bdrv_write = bdrv_write_em;
143     }
144
145     if (!bdrv->bdrv_aio_flush)
146         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
147
148     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
149 }
150
151 /* create a new block device (by default it is empty) */
152 BlockDriverState *bdrv_new(const char *device_name)
153 {
154     BlockDriverState *bs;
155
156     bs = qemu_mallocz(sizeof(BlockDriverState));
157     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
158     if (device_name[0] != '\0') {
159         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
160     }
161     return bs;
162 }
163
164 BlockDriver *bdrv_find_format(const char *format_name)
165 {
166     BlockDriver *drv1;
167     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
168         if (!strcmp(drv1->format_name, format_name)) {
169             return drv1;
170         }
171     }
172     return NULL;
173 }
174
175 static int bdrv_is_whitelisted(BlockDriver *drv)
176 {
177     static const char *whitelist[] = {
178         CONFIG_BDRV_WHITELIST
179     };
180     const char **p;
181
182     if (!whitelist[0])
183         return 1;               /* no whitelist, anything goes */
184
185     for (p = whitelist; *p; p++) {
186         if (!strcmp(drv->format_name, *p)) {
187             return 1;
188         }
189     }
190     return 0;
191 }
192
193 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
194 {
195     BlockDriver *drv = bdrv_find_format(format_name);
196     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
197 }
198
199 int bdrv_create(BlockDriver *drv, const char* filename,
200     QEMUOptionParameter *options)
201 {
202     if (!drv->bdrv_create)
203         return -ENOTSUP;
204
205     return drv->bdrv_create(filename, options);
206 }
207
208 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
209 {
210     BlockDriver *drv;
211
212     drv = bdrv_find_protocol(filename);
213     if (drv == NULL) {
214         drv = bdrv_find_format("file");
215     }
216
217     return bdrv_create(drv, filename, options);
218 }
219
220 #ifdef _WIN32
221 void get_tmp_filename(char *filename, int size)
222 {
223     char temp_dir[MAX_PATH];
224
225     GetTempPath(MAX_PATH, temp_dir);
226     GetTempFileName(temp_dir, "qem", 0, filename);
227 }
228 #else
229 void get_tmp_filename(char *filename, int size)
230 {
231     int fd;
232     const char *tmpdir;
233     /* XXX: race condition possible */
234     tmpdir = getenv("TMPDIR");
235     if (!tmpdir)
236         tmpdir = "/tmp";
237     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
238     fd = mkstemp(filename);
239     close(fd);
240 }
241 #endif
242
243 #ifdef _WIN32
244 static int is_windows_drive_prefix(const char *filename)
245 {
246     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
247              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
248             filename[1] == ':');
249 }
250
251 int is_windows_drive(const char *filename)
252 {
253     if (is_windows_drive_prefix(filename) &&
254         filename[2] == '\0')
255         return 1;
256     if (strstart(filename, "\\\\.\\", NULL) ||
257         strstart(filename, "//./", NULL))
258         return 1;
259     return 0;
260 }
261 #endif
262
263 /*
264  * Detect host devices. By convention, /dev/cdrom[N] is always
265  * recognized as a host CDROM.
266  */
267 static BlockDriver *find_hdev_driver(const char *filename)
268 {
269     int score_max = 0, score;
270     BlockDriver *drv = NULL, *d;
271
272     QLIST_FOREACH(d, &bdrv_drivers, list) {
273         if (d->bdrv_probe_device) {
274             score = d->bdrv_probe_device(filename);
275             if (score > score_max) {
276                 score_max = score;
277                 drv = d;
278             }
279         }
280     }
281
282     return drv;
283 }
284
285 BlockDriver *bdrv_find_protocol(const char *filename)
286 {
287     BlockDriver *drv1;
288     char protocol[128];
289     int len;
290     const char *p;
291     int is_drive;
292
293     /* TODO Drivers without bdrv_file_open must be specified explicitly */
294
295 #ifdef _WIN32
296     is_drive = is_windows_drive(filename) ||
297         is_windows_drive_prefix(filename);
298 #else
299     is_drive = 0;
300 #endif
301     p = strchr(filename, ':');
302     if (!p || is_drive) {
303         drv1 = find_hdev_driver(filename);
304         if (!drv1) {
305             drv1 = bdrv_find_format("file");
306         }
307         return drv1;
308     }
309     len = p - filename;
310     if (len > sizeof(protocol) - 1)
311         len = sizeof(protocol) - 1;
312     memcpy(protocol, filename, len);
313     protocol[len] = '\0';
314     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
315         if (drv1->protocol_name &&
316             !strcmp(drv1->protocol_name, protocol)) {
317             return drv1;
318         }
319     }
320     return NULL;
321 }
322
323 static BlockDriver *find_image_format(const char *filename)
324 {
325     int ret, score, score_max;
326     BlockDriver *drv1, *drv;
327     uint8_t buf[2048];
328     BlockDriverState *bs;
329
330     ret = bdrv_file_open(&bs, filename, 0);
331     if (ret < 0)
332         return NULL;
333
334     /* Return the raw BlockDriver * to scsi-generic devices */
335     if (bs->sg) {
336         bdrv_delete(bs);
337         return bdrv_find_format("raw");
338     }
339
340     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
341     bdrv_delete(bs);
342     if (ret < 0) {
343         return NULL;
344     }
345
346     score_max = 0;
347     drv = NULL;
348     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
349         if (drv1->bdrv_probe) {
350             score = drv1->bdrv_probe(buf, ret, filename);
351             if (score > score_max) {
352                 score_max = score;
353                 drv = drv1;
354             }
355         }
356     }
357     return drv;
358 }
359
360 /**
361  * Set the current 'total_sectors' value
362  */
363 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
364 {
365     BlockDriver *drv = bs->drv;
366
367     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
368     if (bs->sg)
369         return 0;
370
371     /* query actual device if possible, otherwise just trust the hint */
372     if (drv->bdrv_getlength) {
373         int64_t length = drv->bdrv_getlength(bs);
374         if (length < 0) {
375             return length;
376         }
377         hint = length >> BDRV_SECTOR_BITS;
378     }
379
380     bs->total_sectors = hint;
381     return 0;
382 }
383
384 /*
385  * Common part for opening disk images and files
386  */
387 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
388     int flags, BlockDriver *drv)
389 {
390     int ret, open_flags;
391
392     assert(drv != NULL);
393
394     bs->file = NULL;
395     bs->total_sectors = 0;
396     bs->is_temporary = 0;
397     bs->encrypted = 0;
398     bs->valid_key = 0;
399     bs->open_flags = flags;
400     /* buffer_alignment defaulted to 512, drivers can change this value */
401     bs->buffer_alignment = 512;
402
403     pstrcpy(bs->filename, sizeof(bs->filename), filename);
404
405     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
406         return -ENOTSUP;
407     }
408
409     bs->drv = drv;
410     bs->opaque = qemu_mallocz(drv->instance_size);
411
412     /*
413      * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
414      * write cache to the guest.  We do need the fdatasync to flush
415      * out transactions for block allocations, and we maybe have a
416      * volatile write cache in our backing device to deal with.
417      */
418     if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
419         bs->enable_write_cache = 1;
420
421     /*
422      * Clear flags that are internal to the block layer before opening the
423      * image.
424      */
425     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
426
427     /*
428      * Snapshots should be writeable.
429      */
430     if (bs->is_temporary) {
431         open_flags |= BDRV_O_RDWR;
432     }
433
434     /* Open the image, either directly or using a protocol */
435     if (drv->bdrv_file_open) {
436         ret = drv->bdrv_file_open(bs, filename, open_flags);
437     } else {
438         ret = bdrv_file_open(&bs->file, filename, open_flags);
439         if (ret >= 0) {
440             ret = drv->bdrv_open(bs, open_flags);
441         }
442     }
443
444     if (ret < 0) {
445         goto free_and_fail;
446     }
447
448     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
449
450     ret = refresh_total_sectors(bs, bs->total_sectors);
451     if (ret < 0) {
452         goto free_and_fail;
453     }
454
455 #ifndef _WIN32
456     if (bs->is_temporary) {
457         unlink(filename);
458     }
459 #endif
460     return 0;
461
462 free_and_fail:
463     if (bs->file) {
464         bdrv_delete(bs->file);
465         bs->file = NULL;
466     }
467     qemu_free(bs->opaque);
468     bs->opaque = NULL;
469     bs->drv = NULL;
470     return ret;
471 }
472
473 /*
474  * Opens a file using a protocol (file, host_device, nbd, ...)
475  */
476 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
477 {
478     BlockDriverState *bs;
479     BlockDriver *drv;
480     int ret;
481
482     drv = bdrv_find_protocol(filename);
483     if (!drv) {
484         return -ENOENT;
485     }
486
487     bs = bdrv_new("");
488     ret = bdrv_open_common(bs, filename, flags, drv);
489     if (ret < 0) {
490         bdrv_delete(bs);
491         return ret;
492     }
493     bs->growable = 1;
494     *pbs = bs;
495     return 0;
496 }
497
498 /*
499  * Opens a disk image (raw, qcow2, vmdk, ...)
500  */
501 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
502               BlockDriver *drv)
503 {
504     int ret;
505
506     if (flags & BDRV_O_SNAPSHOT) {
507         BlockDriverState *bs1;
508         int64_t total_size;
509         int is_protocol = 0;
510         BlockDriver *bdrv_qcow2;
511         QEMUOptionParameter *options;
512         char tmp_filename[PATH_MAX];
513         char backing_filename[PATH_MAX];
514
515         /* if snapshot, we create a temporary backing file and open it
516            instead of opening 'filename' directly */
517
518         /* if there is a backing file, use it */
519         bs1 = bdrv_new("");
520         ret = bdrv_open(bs1, filename, 0, drv);
521         if (ret < 0) {
522             bdrv_delete(bs1);
523             return ret;
524         }
525         total_size = bdrv_getlength(bs1) >> BDRV_SECTOR_BITS;
526
527         if (bs1->drv && bs1->drv->protocol_name)
528             is_protocol = 1;
529
530         bdrv_delete(bs1);
531
532         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
533
534         /* Real path is meaningless for protocols */
535         if (is_protocol)
536             snprintf(backing_filename, sizeof(backing_filename),
537                      "%s", filename);
538         else if (!realpath(filename, backing_filename))
539             return -errno;
540
541         bdrv_qcow2 = bdrv_find_format("qcow2");
542         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
543
544         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
545         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
546         if (drv) {
547             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
548                 drv->format_name);
549         }
550
551         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
552         free_option_parameters(options);
553         if (ret < 0) {
554             return ret;
555         }
556
557         filename = tmp_filename;
558         drv = bdrv_qcow2;
559         bs->is_temporary = 1;
560     }
561
562     /* Find the right image format driver */
563     if (!drv) {
564         drv = find_image_format(filename);
565     }
566
567     if (!drv) {
568         ret = -ENOENT;
569         goto unlink_and_fail;
570     }
571
572     /* Open the image */
573     ret = bdrv_open_common(bs, filename, flags, drv);
574     if (ret < 0) {
575         goto unlink_and_fail;
576     }
577
578     /* If there is a backing file, use it */
579     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
580         char backing_filename[PATH_MAX];
581         int back_flags;
582         BlockDriver *back_drv = NULL;
583
584         bs->backing_hd = bdrv_new("");
585         path_combine(backing_filename, sizeof(backing_filename),
586                      filename, bs->backing_file);
587         if (bs->backing_format[0] != '\0')
588             back_drv = bdrv_find_format(bs->backing_format);
589
590         /* backing files always opened read-only */
591         back_flags =
592             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
593
594         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
595         if (ret < 0) {
596             bdrv_close(bs);
597             return ret;
598         }
599         if (bs->is_temporary) {
600             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
601         } else {
602             /* base image inherits from "parent" */
603             bs->backing_hd->keep_read_only = bs->keep_read_only;
604         }
605     }
606
607     if (!bdrv_key_required(bs)) {
608         /* call the change callback */
609         bs->media_changed = 1;
610         if (bs->change_cb)
611             bs->change_cb(bs->change_opaque);
612     }
613
614     return 0;
615
616 unlink_and_fail:
617     if (bs->is_temporary) {
618         unlink(filename);
619     }
620     return ret;
621 }
622
623 void bdrv_close(BlockDriverState *bs)
624 {
625     if (bs->drv) {
626         if (bs->backing_hd) {
627             bdrv_delete(bs->backing_hd);
628             bs->backing_hd = NULL;
629         }
630         bs->drv->bdrv_close(bs);
631         qemu_free(bs->opaque);
632 #ifdef _WIN32
633         if (bs->is_temporary) {
634             unlink(bs->filename);
635         }
636 #endif
637         bs->opaque = NULL;
638         bs->drv = NULL;
639
640         if (bs->file != NULL) {
641             bdrv_close(bs->file);
642         }
643
644         /* call the change callback */
645         bs->media_changed = 1;
646         if (bs->change_cb)
647             bs->change_cb(bs->change_opaque);
648     }
649 }
650
651 void bdrv_delete(BlockDriverState *bs)
652 {
653     /* remove from list, if necessary */
654     if (bs->device_name[0] != '\0') {
655         QTAILQ_REMOVE(&bdrv_states, bs, list);
656     }
657
658     bdrv_close(bs);
659     if (bs->file != NULL) {
660         bdrv_delete(bs->file);
661     }
662
663     qemu_free(bs);
664 }
665
666 /*
667  * Run consistency checks on an image
668  *
669  * Returns the number of errors or -errno when an internal error occurs
670  */
671 int bdrv_check(BlockDriverState *bs)
672 {
673     if (bs->drv->bdrv_check == NULL) {
674         return -ENOTSUP;
675     }
676
677     return bs->drv->bdrv_check(bs);
678 }
679
680 /* commit COW file into the raw image */
681 int bdrv_commit(BlockDriverState *bs)
682 {
683     BlockDriver *drv = bs->drv;
684     int64_t i, total_sectors;
685     int n, j, ro, open_flags;
686     int ret = 0, rw_ret = 0;
687     unsigned char sector[512];
688     char filename[1024];
689     BlockDriverState *bs_rw, *bs_ro;
690
691     if (!drv)
692         return -ENOMEDIUM;
693     
694     if (!bs->backing_hd) {
695         return -ENOTSUP;
696     }
697
698     if (bs->backing_hd->keep_read_only) {
699         return -EACCES;
700     }
701     
702     ro = bs->backing_hd->read_only;
703     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
704     open_flags =  bs->backing_hd->open_flags;
705
706     if (ro) {
707         /* re-open as RW */
708         bdrv_delete(bs->backing_hd);
709         bs->backing_hd = NULL;
710         bs_rw = bdrv_new("");
711         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, drv);
712         if (rw_ret < 0) {
713             bdrv_delete(bs_rw);
714             /* try to re-open read-only */
715             bs_ro = bdrv_new("");
716             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, drv);
717             if (ret < 0) {
718                 bdrv_delete(bs_ro);
719                 /* drive not functional anymore */
720                 bs->drv = NULL;
721                 return ret;
722             }
723             bs->backing_hd = bs_ro;
724             return rw_ret;
725         }
726         bs->backing_hd = bs_rw;
727     }
728
729     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
730     for (i = 0; i < total_sectors;) {
731         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
732             for(j = 0; j < n; j++) {
733                 if (bdrv_read(bs, i, sector, 1) != 0) {
734                     ret = -EIO;
735                     goto ro_cleanup;
736                 }
737
738                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
739                     ret = -EIO;
740                     goto ro_cleanup;
741                 }
742                 i++;
743             }
744         } else {
745             i += n;
746         }
747     }
748
749     if (drv->bdrv_make_empty) {
750         ret = drv->bdrv_make_empty(bs);
751         bdrv_flush(bs);
752     }
753
754     /*
755      * Make sure all data we wrote to the backing device is actually
756      * stable on disk.
757      */
758     if (bs->backing_hd)
759         bdrv_flush(bs->backing_hd);
760
761 ro_cleanup:
762
763     if (ro) {
764         /* re-open as RO */
765         bdrv_delete(bs->backing_hd);
766         bs->backing_hd = NULL;
767         bs_ro = bdrv_new("");
768         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, drv);
769         if (ret < 0) {
770             bdrv_delete(bs_ro);
771             /* drive not functional anymore */
772             bs->drv = NULL;
773             return ret;
774         }
775         bs->backing_hd = bs_ro;
776         bs->backing_hd->keep_read_only = 0;
777     }
778
779     return ret;
780 }
781
782 /*
783  * Return values:
784  * 0        - success
785  * -EINVAL  - backing format specified, but no file
786  * -ENOSPC  - can't update the backing file because no space is left in the
787  *            image file header
788  * -ENOTSUP - format driver doesn't support changing the backing file
789  */
790 int bdrv_change_backing_file(BlockDriverState *bs,
791     const char *backing_file, const char *backing_fmt)
792 {
793     BlockDriver *drv = bs->drv;
794
795     if (drv->bdrv_change_backing_file != NULL) {
796         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
797     } else {
798         return -ENOTSUP;
799     }
800 }
801
802 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
803                                    size_t size)
804 {
805     int64_t len;
806
807     if (!bdrv_is_inserted(bs))
808         return -ENOMEDIUM;
809
810     if (bs->growable)
811         return 0;
812
813     len = bdrv_getlength(bs);
814
815     if (offset < 0)
816         return -EIO;
817
818     if ((offset > len) || (len - offset < size))
819         return -EIO;
820
821     return 0;
822 }
823
824 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
825                               int nb_sectors)
826 {
827     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
828 }
829
830 /* return < 0 if error. See bdrv_write() for the return codes */
831 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
832               uint8_t *buf, int nb_sectors)
833 {
834     BlockDriver *drv = bs->drv;
835
836     if (!drv)
837         return -ENOMEDIUM;
838     if (bdrv_check_request(bs, sector_num, nb_sectors))
839         return -EIO;
840
841     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
842 }
843
844 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
845                              int nb_sectors, int dirty)
846 {
847     int64_t start, end;
848     unsigned long val, idx, bit;
849
850     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
851     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
852
853     for (; start <= end; start++) {
854         idx = start / (sizeof(unsigned long) * 8);
855         bit = start % (sizeof(unsigned long) * 8);
856         val = bs->dirty_bitmap[idx];
857         if (dirty) {
858             if (!(val & (1 << bit))) {
859                 bs->dirty_count++;
860                 val |= 1 << bit;
861             }
862         } else {
863             if (val & (1 << bit)) {
864                 bs->dirty_count--;
865                 val &= ~(1 << bit);
866             }
867         }
868         bs->dirty_bitmap[idx] = val;
869     }
870 }
871
872 /* Return < 0 if error. Important errors are:
873   -EIO         generic I/O error (may happen for all errors)
874   -ENOMEDIUM   No media inserted.
875   -EINVAL      Invalid sector number or nb_sectors
876   -EACCES      Trying to write a read-only device
877 */
878 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
879                const uint8_t *buf, int nb_sectors)
880 {
881     BlockDriver *drv = bs->drv;
882     if (!bs->drv)
883         return -ENOMEDIUM;
884     if (bs->read_only)
885         return -EACCES;
886     if (bdrv_check_request(bs, sector_num, nb_sectors))
887         return -EIO;
888
889     if (bs->dirty_bitmap) {
890         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
891     }
892
893     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
894         bs->wr_highest_sector = sector_num + nb_sectors - 1;
895     }
896
897     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
898 }
899
900 int bdrv_pread(BlockDriverState *bs, int64_t offset,
901                void *buf, int count1)
902 {
903     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
904     int len, nb_sectors, count;
905     int64_t sector_num;
906     int ret;
907
908     count = count1;
909     /* first read to align to sector start */
910     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
911     if (len > count)
912         len = count;
913     sector_num = offset >> BDRV_SECTOR_BITS;
914     if (len > 0) {
915         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
916             return ret;
917         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
918         count -= len;
919         if (count == 0)
920             return count1;
921         sector_num++;
922         buf += len;
923     }
924
925     /* read the sectors "in place" */
926     nb_sectors = count >> BDRV_SECTOR_BITS;
927     if (nb_sectors > 0) {
928         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
929             return ret;
930         sector_num += nb_sectors;
931         len = nb_sectors << BDRV_SECTOR_BITS;
932         buf += len;
933         count -= len;
934     }
935
936     /* add data from the last sector */
937     if (count > 0) {
938         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
939             return ret;
940         memcpy(buf, tmp_buf, count);
941     }
942     return count1;
943 }
944
945 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
946                 const void *buf, int count1)
947 {
948     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
949     int len, nb_sectors, count;
950     int64_t sector_num;
951     int ret;
952
953     count = count1;
954     /* first write to align to sector start */
955     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
956     if (len > count)
957         len = count;
958     sector_num = offset >> BDRV_SECTOR_BITS;
959     if (len > 0) {
960         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
961             return ret;
962         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
963         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
964             return ret;
965         count -= len;
966         if (count == 0)
967             return count1;
968         sector_num++;
969         buf += len;
970     }
971
972     /* write the sectors "in place" */
973     nb_sectors = count >> BDRV_SECTOR_BITS;
974     if (nb_sectors > 0) {
975         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
976             return ret;
977         sector_num += nb_sectors;
978         len = nb_sectors << BDRV_SECTOR_BITS;
979         buf += len;
980         count -= len;
981     }
982
983     /* add data from the last sector */
984     if (count > 0) {
985         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
986             return ret;
987         memcpy(tmp_buf, buf, count);
988         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
989             return ret;
990     }
991     return count1;
992 }
993
994 /**
995  * Truncate file to 'offset' bytes (needed only for file protocols)
996  */
997 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
998 {
999     BlockDriver *drv = bs->drv;
1000     int ret;
1001     if (!drv)
1002         return -ENOMEDIUM;
1003     if (!drv->bdrv_truncate)
1004         return -ENOTSUP;
1005     if (bs->read_only)
1006         return -EACCES;
1007     ret = drv->bdrv_truncate(bs, offset);
1008     if (ret == 0) {
1009         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1010     }
1011     return ret;
1012 }
1013
1014 /**
1015  * Length of a file in bytes. Return < 0 if error or unknown.
1016  */
1017 int64_t bdrv_getlength(BlockDriverState *bs)
1018 {
1019     BlockDriver *drv = bs->drv;
1020     if (!drv)
1021         return -ENOMEDIUM;
1022
1023     /* Fixed size devices use the total_sectors value for speed instead of
1024        issuing a length query (like lseek) on each call.  Also, legacy block
1025        drivers don't provide a bdrv_getlength function and must use
1026        total_sectors. */
1027     if (!bs->growable || !drv->bdrv_getlength) {
1028         return bs->total_sectors * BDRV_SECTOR_SIZE;
1029     }
1030     return drv->bdrv_getlength(bs);
1031 }
1032
1033 /* return 0 as number of sectors if no device present or error */
1034 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1035 {
1036     int64_t length;
1037     length = bdrv_getlength(bs);
1038     if (length < 0)
1039         length = 0;
1040     else
1041         length = length >> BDRV_SECTOR_BITS;
1042     *nb_sectors_ptr = length;
1043 }
1044
1045 struct partition {
1046         uint8_t boot_ind;           /* 0x80 - active */
1047         uint8_t head;               /* starting head */
1048         uint8_t sector;             /* starting sector */
1049         uint8_t cyl;                /* starting cylinder */
1050         uint8_t sys_ind;            /* What partition type */
1051         uint8_t end_head;           /* end head */
1052         uint8_t end_sector;         /* end sector */
1053         uint8_t end_cyl;            /* end cylinder */
1054         uint32_t start_sect;        /* starting sector counting from 0 */
1055         uint32_t nr_sects;          /* nr of sectors in partition */
1056 } __attribute__((packed));
1057
1058 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1059 static int guess_disk_lchs(BlockDriverState *bs,
1060                            int *pcylinders, int *pheads, int *psectors)
1061 {
1062     uint8_t buf[512];
1063     int ret, i, heads, sectors, cylinders;
1064     struct partition *p;
1065     uint32_t nr_sects;
1066     uint64_t nb_sectors;
1067
1068     bdrv_get_geometry(bs, &nb_sectors);
1069
1070     ret = bdrv_read(bs, 0, buf, 1);
1071     if (ret < 0)
1072         return -1;
1073     /* test msdos magic */
1074     if (buf[510] != 0x55 || buf[511] != 0xaa)
1075         return -1;
1076     for(i = 0; i < 4; i++) {
1077         p = ((struct partition *)(buf + 0x1be)) + i;
1078         nr_sects = le32_to_cpu(p->nr_sects);
1079         if (nr_sects && p->end_head) {
1080             /* We make the assumption that the partition terminates on
1081                a cylinder boundary */
1082             heads = p->end_head + 1;
1083             sectors = p->end_sector & 63;
1084             if (sectors == 0)
1085                 continue;
1086             cylinders = nb_sectors / (heads * sectors);
1087             if (cylinders < 1 || cylinders > 16383)
1088                 continue;
1089             *pheads = heads;
1090             *psectors = sectors;
1091             *pcylinders = cylinders;
1092 #if 0
1093             printf("guessed geometry: LCHS=%d %d %d\n",
1094                    cylinders, heads, sectors);
1095 #endif
1096             return 0;
1097         }
1098     }
1099     return -1;
1100 }
1101
1102 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1103 {
1104     int translation, lba_detected = 0;
1105     int cylinders, heads, secs;
1106     uint64_t nb_sectors;
1107
1108     /* if a geometry hint is available, use it */
1109     bdrv_get_geometry(bs, &nb_sectors);
1110     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1111     translation = bdrv_get_translation_hint(bs);
1112     if (cylinders != 0) {
1113         *pcyls = cylinders;
1114         *pheads = heads;
1115         *psecs = secs;
1116     } else {
1117         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1118             if (heads > 16) {
1119                 /* if heads > 16, it means that a BIOS LBA
1120                    translation was active, so the default
1121                    hardware geometry is OK */
1122                 lba_detected = 1;
1123                 goto default_geometry;
1124             } else {
1125                 *pcyls = cylinders;
1126                 *pheads = heads;
1127                 *psecs = secs;
1128                 /* disable any translation to be in sync with
1129                    the logical geometry */
1130                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1131                     bdrv_set_translation_hint(bs,
1132                                               BIOS_ATA_TRANSLATION_NONE);
1133                 }
1134             }
1135         } else {
1136         default_geometry:
1137             /* if no geometry, use a standard physical disk geometry */
1138             cylinders = nb_sectors / (16 * 63);
1139
1140             if (cylinders > 16383)
1141                 cylinders = 16383;
1142             else if (cylinders < 2)
1143                 cylinders = 2;
1144             *pcyls = cylinders;
1145             *pheads = 16;
1146             *psecs = 63;
1147             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1148                 if ((*pcyls * *pheads) <= 131072) {
1149                     bdrv_set_translation_hint(bs,
1150                                               BIOS_ATA_TRANSLATION_LARGE);
1151                 } else {
1152                     bdrv_set_translation_hint(bs,
1153                                               BIOS_ATA_TRANSLATION_LBA);
1154                 }
1155             }
1156         }
1157         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1158     }
1159 }
1160
1161 void bdrv_set_geometry_hint(BlockDriverState *bs,
1162                             int cyls, int heads, int secs)
1163 {
1164     bs->cyls = cyls;
1165     bs->heads = heads;
1166     bs->secs = secs;
1167 }
1168
1169 void bdrv_set_type_hint(BlockDriverState *bs, int type)
1170 {
1171     bs->type = type;
1172     bs->removable = ((type == BDRV_TYPE_CDROM ||
1173                       type == BDRV_TYPE_FLOPPY));
1174 }
1175
1176 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1177 {
1178     bs->translation = translation;
1179 }
1180
1181 void bdrv_get_geometry_hint(BlockDriverState *bs,
1182                             int *pcyls, int *pheads, int *psecs)
1183 {
1184     *pcyls = bs->cyls;
1185     *pheads = bs->heads;
1186     *psecs = bs->secs;
1187 }
1188
1189 int bdrv_get_type_hint(BlockDriverState *bs)
1190 {
1191     return bs->type;
1192 }
1193
1194 int bdrv_get_translation_hint(BlockDriverState *bs)
1195 {
1196     return bs->translation;
1197 }
1198
1199 int bdrv_is_removable(BlockDriverState *bs)
1200 {
1201     return bs->removable;
1202 }
1203
1204 int bdrv_is_read_only(BlockDriverState *bs)
1205 {
1206     return bs->read_only;
1207 }
1208
1209 int bdrv_is_sg(BlockDriverState *bs)
1210 {
1211     return bs->sg;
1212 }
1213
1214 int bdrv_enable_write_cache(BlockDriverState *bs)
1215 {
1216     return bs->enable_write_cache;
1217 }
1218
1219 /* XXX: no longer used */
1220 void bdrv_set_change_cb(BlockDriverState *bs,
1221                         void (*change_cb)(void *opaque), void *opaque)
1222 {
1223     bs->change_cb = change_cb;
1224     bs->change_opaque = opaque;
1225 }
1226
1227 int bdrv_is_encrypted(BlockDriverState *bs)
1228 {
1229     if (bs->backing_hd && bs->backing_hd->encrypted)
1230         return 1;
1231     return bs->encrypted;
1232 }
1233
1234 int bdrv_key_required(BlockDriverState *bs)
1235 {
1236     BlockDriverState *backing_hd = bs->backing_hd;
1237
1238     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1239         return 1;
1240     return (bs->encrypted && !bs->valid_key);
1241 }
1242
1243 int bdrv_set_key(BlockDriverState *bs, const char *key)
1244 {
1245     int ret;
1246     if (bs->backing_hd && bs->backing_hd->encrypted) {
1247         ret = bdrv_set_key(bs->backing_hd, key);
1248         if (ret < 0)
1249             return ret;
1250         if (!bs->encrypted)
1251             return 0;
1252     }
1253     if (!bs->encrypted) {
1254         return -EINVAL;
1255     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1256         return -ENOMEDIUM;
1257     }
1258     ret = bs->drv->bdrv_set_key(bs, key);
1259     if (ret < 0) {
1260         bs->valid_key = 0;
1261     } else if (!bs->valid_key) {
1262         bs->valid_key = 1;
1263         /* call the change callback now, we skipped it on open */
1264         bs->media_changed = 1;
1265         if (bs->change_cb)
1266             bs->change_cb(bs->change_opaque);
1267     }
1268     return ret;
1269 }
1270
1271 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1272 {
1273     if (!bs->drv) {
1274         buf[0] = '\0';
1275     } else {
1276         pstrcpy(buf, buf_size, bs->drv->format_name);
1277     }
1278 }
1279
1280 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1281                          void *opaque)
1282 {
1283     BlockDriver *drv;
1284
1285     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1286         it(opaque, drv->format_name);
1287     }
1288 }
1289
1290 BlockDriverState *bdrv_find(const char *name)
1291 {
1292     BlockDriverState *bs;
1293
1294     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1295         if (!strcmp(name, bs->device_name)) {
1296             return bs;
1297         }
1298     }
1299     return NULL;
1300 }
1301
1302 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1303 {
1304     BlockDriverState *bs;
1305
1306     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1307         it(opaque, bs);
1308     }
1309 }
1310
1311 const char *bdrv_get_device_name(BlockDriverState *bs)
1312 {
1313     return bs->device_name;
1314 }
1315
1316 void bdrv_flush(BlockDriverState *bs)
1317 {
1318     if (bs->open_flags & BDRV_O_NO_FLUSH) {
1319         return;
1320     }
1321
1322     if (bs->drv && bs->drv->bdrv_flush)
1323         bs->drv->bdrv_flush(bs);
1324 }
1325
1326 void bdrv_flush_all(void)
1327 {
1328     BlockDriverState *bs;
1329
1330     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1331         if (bs->drv && !bdrv_is_read_only(bs) &&
1332             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1333             bdrv_flush(bs);
1334         }
1335     }
1336 }
1337
1338 int bdrv_has_zero_init(BlockDriverState *bs)
1339 {
1340     assert(bs->drv);
1341
1342     if (bs->drv->no_zero_init) {
1343         return 0;
1344     } else if (bs->file) {
1345         return bdrv_has_zero_init(bs->file);
1346     }
1347
1348     return 1;
1349 }
1350
1351 /*
1352  * Returns true iff the specified sector is present in the disk image. Drivers
1353  * not implementing the functionality are assumed to not support backing files,
1354  * hence all their sectors are reported as allocated.
1355  *
1356  * 'pnum' is set to the number of sectors (including and immediately following
1357  * the specified sector) that are known to be in the same
1358  * allocated/unallocated state.
1359  *
1360  * 'nb_sectors' is the max value 'pnum' should be set to.
1361  */
1362 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1363         int *pnum)
1364 {
1365     int64_t n;
1366     if (!bs->drv->bdrv_is_allocated) {
1367         if (sector_num >= bs->total_sectors) {
1368             *pnum = 0;
1369             return 0;
1370         }
1371         n = bs->total_sectors - sector_num;
1372         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1373         return 1;
1374     }
1375     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1376 }
1377
1378 void bdrv_mon_event(const BlockDriverState *bdrv,
1379                     BlockMonEventAction action, int is_read)
1380 {
1381     QObject *data;
1382     const char *action_str;
1383
1384     switch (action) {
1385     case BDRV_ACTION_REPORT:
1386         action_str = "report";
1387         break;
1388     case BDRV_ACTION_IGNORE:
1389         action_str = "ignore";
1390         break;
1391     case BDRV_ACTION_STOP:
1392         action_str = "stop";
1393         break;
1394     default:
1395         abort();
1396     }
1397
1398     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1399                               bdrv->device_name,
1400                               action_str,
1401                               is_read ? "read" : "write");
1402     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1403
1404     qobject_decref(data);
1405 }
1406
1407 static void bdrv_print_dict(QObject *obj, void *opaque)
1408 {
1409     QDict *bs_dict;
1410     Monitor *mon = opaque;
1411
1412     bs_dict = qobject_to_qdict(obj);
1413
1414     monitor_printf(mon, "%s: type=%s removable=%d",
1415                         qdict_get_str(bs_dict, "device"),
1416                         qdict_get_str(bs_dict, "type"),
1417                         qdict_get_bool(bs_dict, "removable"));
1418
1419     if (qdict_get_bool(bs_dict, "removable")) {
1420         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1421     }
1422
1423     if (qdict_haskey(bs_dict, "inserted")) {
1424         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1425
1426         monitor_printf(mon, " file=");
1427         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1428         if (qdict_haskey(qdict, "backing_file")) {
1429             monitor_printf(mon, " backing_file=");
1430             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1431         }
1432         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1433                             qdict_get_bool(qdict, "ro"),
1434                             qdict_get_str(qdict, "drv"),
1435                             qdict_get_bool(qdict, "encrypted"));
1436     } else {
1437         monitor_printf(mon, " [not inserted]");
1438     }
1439
1440     monitor_printf(mon, "\n");
1441 }
1442
1443 void bdrv_info_print(Monitor *mon, const QObject *data)
1444 {
1445     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1446 }
1447
1448 void bdrv_info(Monitor *mon, QObject **ret_data)
1449 {
1450     QList *bs_list;
1451     BlockDriverState *bs;
1452
1453     bs_list = qlist_new();
1454
1455     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1456         QObject *bs_obj;
1457         const char *type = "unknown";
1458
1459         switch(bs->type) {
1460         case BDRV_TYPE_HD:
1461             type = "hd";
1462             break;
1463         case BDRV_TYPE_CDROM:
1464             type = "cdrom";
1465             break;
1466         case BDRV_TYPE_FLOPPY:
1467             type = "floppy";
1468             break;
1469         }
1470
1471         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, "
1472                                     "'removable': %i, 'locked': %i }",
1473                                     bs->device_name, type, bs->removable,
1474                                     bs->locked);
1475
1476         if (bs->drv) {
1477             QObject *obj;
1478             QDict *bs_dict = qobject_to_qdict(bs_obj);
1479
1480             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1481                                      "'encrypted': %i }",
1482                                      bs->filename, bs->read_only,
1483                                      bs->drv->format_name,
1484                                      bdrv_is_encrypted(bs));
1485             if (bs->backing_file[0] != '\0') {
1486                 QDict *qdict = qobject_to_qdict(obj);
1487                 qdict_put(qdict, "backing_file",
1488                           qstring_from_str(bs->backing_file));
1489             }
1490
1491             qdict_put_obj(bs_dict, "inserted", obj);
1492         }
1493         qlist_append_obj(bs_list, bs_obj);
1494     }
1495
1496     *ret_data = QOBJECT(bs_list);
1497 }
1498
1499 static void bdrv_stats_iter(QObject *data, void *opaque)
1500 {
1501     QDict *qdict;
1502     Monitor *mon = opaque;
1503
1504     qdict = qobject_to_qdict(data);
1505     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1506
1507     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1508     monitor_printf(mon, " rd_bytes=%" PRId64
1509                         " wr_bytes=%" PRId64
1510                         " rd_operations=%" PRId64
1511                         " wr_operations=%" PRId64
1512                         "\n",
1513                         qdict_get_int(qdict, "rd_bytes"),
1514                         qdict_get_int(qdict, "wr_bytes"),
1515                         qdict_get_int(qdict, "rd_operations"),
1516                         qdict_get_int(qdict, "wr_operations"));
1517 }
1518
1519 void bdrv_stats_print(Monitor *mon, const QObject *data)
1520 {
1521     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1522 }
1523
1524 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1525 {
1526     QObject *res;
1527     QDict *dict;
1528
1529     res = qobject_from_jsonf("{ 'stats': {"
1530                              "'rd_bytes': %" PRId64 ","
1531                              "'wr_bytes': %" PRId64 ","
1532                              "'rd_operations': %" PRId64 ","
1533                              "'wr_operations': %" PRId64 ","
1534                              "'wr_highest_offset': %" PRId64
1535                              "} }",
1536                              bs->rd_bytes, bs->wr_bytes,
1537                              bs->rd_ops, bs->wr_ops,
1538                              bs->wr_highest_sector * 512);
1539     dict  = qobject_to_qdict(res);
1540
1541     if (*bs->device_name) {
1542         qdict_put(dict, "device", qstring_from_str(bs->device_name));
1543     }
1544
1545     if (bs->file) {
1546         QObject *parent = bdrv_info_stats_bs(bs->file);
1547         qdict_put_obj(dict, "parent", parent);
1548     }
1549
1550     return res;
1551 }
1552
1553 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1554 {
1555     QObject *obj;
1556     QList *devices;
1557     BlockDriverState *bs;
1558
1559     devices = qlist_new();
1560
1561     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1562         obj = bdrv_info_stats_bs(bs);
1563         qlist_append_obj(devices, obj);
1564     }
1565
1566     *ret_data = QOBJECT(devices);
1567 }
1568
1569 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1570 {
1571     if (bs->backing_hd && bs->backing_hd->encrypted)
1572         return bs->backing_file;
1573     else if (bs->encrypted)
1574         return bs->filename;
1575     else
1576         return NULL;
1577 }
1578
1579 void bdrv_get_backing_filename(BlockDriverState *bs,
1580                                char *filename, int filename_size)
1581 {
1582     if (!bs->backing_file) {
1583         pstrcpy(filename, filename_size, "");
1584     } else {
1585         pstrcpy(filename, filename_size, bs->backing_file);
1586     }
1587 }
1588
1589 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1590                           const uint8_t *buf, int nb_sectors)
1591 {
1592     BlockDriver *drv = bs->drv;
1593     if (!drv)
1594         return -ENOMEDIUM;
1595     if (!drv->bdrv_write_compressed)
1596         return -ENOTSUP;
1597     if (bdrv_check_request(bs, sector_num, nb_sectors))
1598         return -EIO;
1599
1600     if (bs->dirty_bitmap) {
1601         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1602     }
1603
1604     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1605 }
1606
1607 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1608 {
1609     BlockDriver *drv = bs->drv;
1610     if (!drv)
1611         return -ENOMEDIUM;
1612     if (!drv->bdrv_get_info)
1613         return -ENOTSUP;
1614     memset(bdi, 0, sizeof(*bdi));
1615     return drv->bdrv_get_info(bs, bdi);
1616 }
1617
1618 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1619                       int64_t pos, int size)
1620 {
1621     BlockDriver *drv = bs->drv;
1622     if (!drv)
1623         return -ENOMEDIUM;
1624     if (!drv->bdrv_save_vmstate)
1625         return -ENOTSUP;
1626     return drv->bdrv_save_vmstate(bs, buf, pos, size);
1627 }
1628
1629 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1630                       int64_t pos, int size)
1631 {
1632     BlockDriver *drv = bs->drv;
1633     if (!drv)
1634         return -ENOMEDIUM;
1635     if (!drv->bdrv_load_vmstate)
1636         return -ENOTSUP;
1637     return drv->bdrv_load_vmstate(bs, buf, pos, size);
1638 }
1639
1640 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1641 {
1642     BlockDriver *drv = bs->drv;
1643
1644     if (!drv || !drv->bdrv_debug_event) {
1645         return;
1646     }
1647
1648     return drv->bdrv_debug_event(bs, event);
1649
1650 }
1651
1652 /**************************************************************/
1653 /* handling of snapshots */
1654
1655 int bdrv_snapshot_create(BlockDriverState *bs,
1656                          QEMUSnapshotInfo *sn_info)
1657 {
1658     BlockDriver *drv = bs->drv;
1659     if (!drv)
1660         return -ENOMEDIUM;
1661     if (!drv->bdrv_snapshot_create)
1662         return -ENOTSUP;
1663     return drv->bdrv_snapshot_create(bs, sn_info);
1664 }
1665
1666 int bdrv_snapshot_goto(BlockDriverState *bs,
1667                        const char *snapshot_id)
1668 {
1669     BlockDriver *drv = bs->drv;
1670     if (!drv)
1671         return -ENOMEDIUM;
1672     if (!drv->bdrv_snapshot_goto)
1673         return -ENOTSUP;
1674     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1675 }
1676
1677 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1678 {
1679     BlockDriver *drv = bs->drv;
1680     if (!drv)
1681         return -ENOMEDIUM;
1682     if (!drv->bdrv_snapshot_delete)
1683         return -ENOTSUP;
1684     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1685 }
1686
1687 int bdrv_snapshot_list(BlockDriverState *bs,
1688                        QEMUSnapshotInfo **psn_info)
1689 {
1690     BlockDriver *drv = bs->drv;
1691     if (!drv)
1692         return -ENOMEDIUM;
1693     if (!drv->bdrv_snapshot_list)
1694         return -ENOTSUP;
1695     return drv->bdrv_snapshot_list(bs, psn_info);
1696 }
1697
1698 #define NB_SUFFIXES 4
1699
1700 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1701 {
1702     static const char suffixes[NB_SUFFIXES] = "KMGT";
1703     int64_t base;
1704     int i;
1705
1706     if (size <= 999) {
1707         snprintf(buf, buf_size, "%" PRId64, size);
1708     } else {
1709         base = 1024;
1710         for(i = 0; i < NB_SUFFIXES; i++) {
1711             if (size < (10 * base)) {
1712                 snprintf(buf, buf_size, "%0.1f%c",
1713                          (double)size / base,
1714                          suffixes[i]);
1715                 break;
1716             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1717                 snprintf(buf, buf_size, "%" PRId64 "%c",
1718                          ((size + (base >> 1)) / base),
1719                          suffixes[i]);
1720                 break;
1721             }
1722             base = base * 1024;
1723         }
1724     }
1725     return buf;
1726 }
1727
1728 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1729 {
1730     char buf1[128], date_buf[128], clock_buf[128];
1731 #ifdef _WIN32
1732     struct tm *ptm;
1733 #else
1734     struct tm tm;
1735 #endif
1736     time_t ti;
1737     int64_t secs;
1738
1739     if (!sn) {
1740         snprintf(buf, buf_size,
1741                  "%-10s%-20s%7s%20s%15s",
1742                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1743     } else {
1744         ti = sn->date_sec;
1745 #ifdef _WIN32
1746         ptm = localtime(&ti);
1747         strftime(date_buf, sizeof(date_buf),
1748                  "%Y-%m-%d %H:%M:%S", ptm);
1749 #else
1750         localtime_r(&ti, &tm);
1751         strftime(date_buf, sizeof(date_buf),
1752                  "%Y-%m-%d %H:%M:%S", &tm);
1753 #endif
1754         secs = sn->vm_clock_nsec / 1000000000;
1755         snprintf(clock_buf, sizeof(clock_buf),
1756                  "%02d:%02d:%02d.%03d",
1757                  (int)(secs / 3600),
1758                  (int)((secs / 60) % 60),
1759                  (int)(secs % 60),
1760                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1761         snprintf(buf, buf_size,
1762                  "%-10s%-20s%7s%20s%15s",
1763                  sn->id_str, sn->name,
1764                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1765                  date_buf,
1766                  clock_buf);
1767     }
1768     return buf;
1769 }
1770
1771
1772 /**************************************************************/
1773 /* async I/Os */
1774
1775 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1776                                  QEMUIOVector *qiov, int nb_sectors,
1777                                  BlockDriverCompletionFunc *cb, void *opaque)
1778 {
1779     BlockDriver *drv = bs->drv;
1780     BlockDriverAIOCB *ret;
1781
1782     if (!drv)
1783         return NULL;
1784     if (bdrv_check_request(bs, sector_num, nb_sectors))
1785         return NULL;
1786
1787     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1788                               cb, opaque);
1789
1790     if (ret) {
1791         /* Update stats even though technically transfer has not happened. */
1792         bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1793         bs->rd_ops ++;
1794     }
1795
1796     return ret;
1797 }
1798
1799 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1800                                   QEMUIOVector *qiov, int nb_sectors,
1801                                   BlockDriverCompletionFunc *cb, void *opaque)
1802 {
1803     BlockDriver *drv = bs->drv;
1804     BlockDriverAIOCB *ret;
1805
1806     if (!drv)
1807         return NULL;
1808     if (bs->read_only)
1809         return NULL;
1810     if (bdrv_check_request(bs, sector_num, nb_sectors))
1811         return NULL;
1812
1813     if (bs->dirty_bitmap) {
1814         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1815     }
1816
1817     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1818                                cb, opaque);
1819
1820     if (ret) {
1821         /* Update stats even though technically transfer has not happened. */
1822         bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1823         bs->wr_ops ++;
1824         if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1825             bs->wr_highest_sector = sector_num + nb_sectors - 1;
1826         }
1827     }
1828
1829     return ret;
1830 }
1831
1832
1833 typedef struct MultiwriteCB {
1834     int error;
1835     int num_requests;
1836     int num_callbacks;
1837     struct {
1838         BlockDriverCompletionFunc *cb;
1839         void *opaque;
1840         QEMUIOVector *free_qiov;
1841         void *free_buf;
1842     } callbacks[];
1843 } MultiwriteCB;
1844
1845 static void multiwrite_user_cb(MultiwriteCB *mcb)
1846 {
1847     int i;
1848
1849     for (i = 0; i < mcb->num_callbacks; i++) {
1850         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1851         if (mcb->callbacks[i].free_qiov) {
1852             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
1853         }
1854         qemu_free(mcb->callbacks[i].free_qiov);
1855         qemu_vfree(mcb->callbacks[i].free_buf);
1856     }
1857 }
1858
1859 static void multiwrite_cb(void *opaque, int ret)
1860 {
1861     MultiwriteCB *mcb = opaque;
1862
1863     if (ret < 0 && !mcb->error) {
1864         mcb->error = ret;
1865         multiwrite_user_cb(mcb);
1866     }
1867
1868     mcb->num_requests--;
1869     if (mcb->num_requests == 0) {
1870         if (mcb->error == 0) {
1871             multiwrite_user_cb(mcb);
1872         }
1873         qemu_free(mcb);
1874     }
1875 }
1876
1877 static int multiwrite_req_compare(const void *a, const void *b)
1878 {
1879     const BlockRequest *req1 = a, *req2 = b;
1880
1881     /*
1882      * Note that we can't simply subtract req2->sector from req1->sector
1883      * here as that could overflow the return value.
1884      */
1885     if (req1->sector > req2->sector) {
1886         return 1;
1887     } else if (req1->sector < req2->sector) {
1888         return -1;
1889     } else {
1890         return 0;
1891     }
1892 }
1893
1894 /*
1895  * Takes a bunch of requests and tries to merge them. Returns the number of
1896  * requests that remain after merging.
1897  */
1898 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
1899     int num_reqs, MultiwriteCB *mcb)
1900 {
1901     int i, outidx;
1902
1903     // Sort requests by start sector
1904     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
1905
1906     // Check if adjacent requests touch the same clusters. If so, combine them,
1907     // filling up gaps with zero sectors.
1908     outidx = 0;
1909     for (i = 1; i < num_reqs; i++) {
1910         int merge = 0;
1911         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
1912
1913         // This handles the cases that are valid for all block drivers, namely
1914         // exactly sequential writes and overlapping writes.
1915         if (reqs[i].sector <= oldreq_last) {
1916             merge = 1;
1917         }
1918
1919         // The block driver may decide that it makes sense to combine requests
1920         // even if there is a gap of some sectors between them. In this case,
1921         // the gap is filled with zeros (therefore only applicable for yet
1922         // unused space in format like qcow2).
1923         if (!merge && bs->drv->bdrv_merge_requests) {
1924             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
1925         }
1926
1927         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
1928             merge = 0;
1929         }
1930
1931         if (merge) {
1932             size_t size;
1933             QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
1934             qemu_iovec_init(qiov,
1935                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
1936
1937             // Add the first request to the merged one. If the requests are
1938             // overlapping, drop the last sectors of the first request.
1939             size = (reqs[i].sector - reqs[outidx].sector) << 9;
1940             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
1941
1942             // We might need to add some zeros between the two requests
1943             if (reqs[i].sector > oldreq_last) {
1944                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
1945                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
1946                 memset(buf, 0, zero_bytes);
1947                 qemu_iovec_add(qiov, buf, zero_bytes);
1948                 mcb->callbacks[i].free_buf = buf;
1949             }
1950
1951             // Add the second request
1952             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
1953
1954             reqs[outidx].nb_sectors = qiov->size >> 9;
1955             reqs[outidx].qiov = qiov;
1956
1957             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
1958         } else {
1959             outidx++;
1960             reqs[outidx].sector     = reqs[i].sector;
1961             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
1962             reqs[outidx].qiov       = reqs[i].qiov;
1963         }
1964     }
1965
1966     return outidx + 1;
1967 }
1968
1969 /*
1970  * Submit multiple AIO write requests at once.
1971  *
1972  * On success, the function returns 0 and all requests in the reqs array have
1973  * been submitted. In error case this function returns -1, and any of the
1974  * requests may or may not be submitted yet. In particular, this means that the
1975  * callback will be called for some of the requests, for others it won't. The
1976  * caller must check the error field of the BlockRequest to wait for the right
1977  * callbacks (if error != 0, no callback will be called).
1978  *
1979  * The implementation may modify the contents of the reqs array, e.g. to merge
1980  * requests. However, the fields opaque and error are left unmodified as they
1981  * are used to signal failure for a single request to the caller.
1982  */
1983 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
1984 {
1985     BlockDriverAIOCB *acb;
1986     MultiwriteCB *mcb;
1987     int i;
1988
1989     if (num_reqs == 0) {
1990         return 0;
1991     }
1992
1993     // Create MultiwriteCB structure
1994     mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
1995     mcb->num_requests = 0;
1996     mcb->num_callbacks = num_reqs;
1997
1998     for (i = 0; i < num_reqs; i++) {
1999         mcb->callbacks[i].cb = reqs[i].cb;
2000         mcb->callbacks[i].opaque = reqs[i].opaque;
2001     }
2002
2003     // Check for mergable requests
2004     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2005
2006     // Run the aio requests
2007     for (i = 0; i < num_reqs; i++) {
2008         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2009             reqs[i].nb_sectors, multiwrite_cb, mcb);
2010
2011         if (acb == NULL) {
2012             // We can only fail the whole thing if no request has been
2013             // submitted yet. Otherwise we'll wait for the submitted AIOs to
2014             // complete and report the error in the callback.
2015             if (mcb->num_requests == 0) {
2016                 reqs[i].error = -EIO;
2017                 goto fail;
2018             } else {
2019                 mcb->num_requests++;
2020                 multiwrite_cb(mcb, -EIO);
2021                 break;
2022             }
2023         } else {
2024             mcb->num_requests++;
2025         }
2026     }
2027
2028     return 0;
2029
2030 fail:
2031     qemu_free(mcb);
2032     return -1;
2033 }
2034
2035 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2036         BlockDriverCompletionFunc *cb, void *opaque)
2037 {
2038     BlockDriver *drv = bs->drv;
2039
2040     if (bs->open_flags & BDRV_O_NO_FLUSH) {
2041         return bdrv_aio_noop_em(bs, cb, opaque);
2042     }
2043
2044     if (!drv)
2045         return NULL;
2046     return drv->bdrv_aio_flush(bs, cb, opaque);
2047 }
2048
2049 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2050 {
2051     acb->pool->cancel(acb);
2052 }
2053
2054
2055 /**************************************************************/
2056 /* async block device emulation */
2057
2058 typedef struct BlockDriverAIOCBSync {
2059     BlockDriverAIOCB common;
2060     QEMUBH *bh;
2061     int ret;
2062     /* vector translation state */
2063     QEMUIOVector *qiov;
2064     uint8_t *bounce;
2065     int is_write;
2066 } BlockDriverAIOCBSync;
2067
2068 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2069 {
2070     BlockDriverAIOCBSync *acb =
2071         container_of(blockacb, BlockDriverAIOCBSync, common);
2072     qemu_bh_delete(acb->bh);
2073     acb->bh = NULL;
2074     qemu_aio_release(acb);
2075 }
2076
2077 static AIOPool bdrv_em_aio_pool = {
2078     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2079     .cancel             = bdrv_aio_cancel_em,
2080 };
2081
2082 static void bdrv_aio_bh_cb(void *opaque)
2083 {
2084     BlockDriverAIOCBSync *acb = opaque;
2085
2086     if (!acb->is_write)
2087         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2088     qemu_vfree(acb->bounce);
2089     acb->common.cb(acb->common.opaque, acb->ret);
2090     qemu_bh_delete(acb->bh);
2091     acb->bh = NULL;
2092     qemu_aio_release(acb);
2093 }
2094
2095 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2096                                             int64_t sector_num,
2097                                             QEMUIOVector *qiov,
2098                                             int nb_sectors,
2099                                             BlockDriverCompletionFunc *cb,
2100                                             void *opaque,
2101                                             int is_write)
2102
2103 {
2104     BlockDriverAIOCBSync *acb;
2105
2106     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2107     acb->is_write = is_write;
2108     acb->qiov = qiov;
2109     acb->bounce = qemu_blockalign(bs, qiov->size);
2110
2111     if (!acb->bh)
2112         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2113
2114     if (is_write) {
2115         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2116         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2117     } else {
2118         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2119     }
2120
2121     qemu_bh_schedule(acb->bh);
2122
2123     return &acb->common;
2124 }
2125
2126 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2127         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2128         BlockDriverCompletionFunc *cb, void *opaque)
2129 {
2130     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2131 }
2132
2133 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2134         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2135         BlockDriverCompletionFunc *cb, void *opaque)
2136 {
2137     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2138 }
2139
2140 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2141         BlockDriverCompletionFunc *cb, void *opaque)
2142 {
2143     BlockDriverAIOCBSync *acb;
2144
2145     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2146     acb->is_write = 1; /* don't bounce in the completion hadler */
2147     acb->qiov = NULL;
2148     acb->bounce = NULL;
2149     acb->ret = 0;
2150
2151     if (!acb->bh)
2152         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2153
2154     bdrv_flush(bs);
2155     qemu_bh_schedule(acb->bh);
2156     return &acb->common;
2157 }
2158
2159 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2160         BlockDriverCompletionFunc *cb, void *opaque)
2161 {
2162     BlockDriverAIOCBSync *acb;
2163
2164     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2165     acb->is_write = 1; /* don't bounce in the completion handler */
2166     acb->qiov = NULL;
2167     acb->bounce = NULL;
2168     acb->ret = 0;
2169
2170     if (!acb->bh) {
2171         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2172     }
2173
2174     qemu_bh_schedule(acb->bh);
2175     return &acb->common;
2176 }
2177
2178 /**************************************************************/
2179 /* sync block device emulation */
2180
2181 static void bdrv_rw_em_cb(void *opaque, int ret)
2182 {
2183     *(int *)opaque = ret;
2184 }
2185
2186 #define NOT_DONE 0x7fffffff
2187
2188 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2189                         uint8_t *buf, int nb_sectors)
2190 {
2191     int async_ret;
2192     BlockDriverAIOCB *acb;
2193     struct iovec iov;
2194     QEMUIOVector qiov;
2195
2196     async_context_push();
2197
2198     async_ret = NOT_DONE;
2199     iov.iov_base = (void *)buf;
2200     iov.iov_len = nb_sectors * 512;
2201     qemu_iovec_init_external(&qiov, &iov, 1);
2202     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2203         bdrv_rw_em_cb, &async_ret);
2204     if (acb == NULL) {
2205         async_ret = -1;
2206         goto fail;
2207     }
2208
2209     while (async_ret == NOT_DONE) {
2210         qemu_aio_wait();
2211     }
2212
2213
2214 fail:
2215     async_context_pop();
2216     return async_ret;
2217 }
2218
2219 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2220                          const uint8_t *buf, int nb_sectors)
2221 {
2222     int async_ret;
2223     BlockDriverAIOCB *acb;
2224     struct iovec iov;
2225     QEMUIOVector qiov;
2226
2227     async_context_push();
2228
2229     async_ret = NOT_DONE;
2230     iov.iov_base = (void *)buf;
2231     iov.iov_len = nb_sectors * 512;
2232     qemu_iovec_init_external(&qiov, &iov, 1);
2233     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2234         bdrv_rw_em_cb, &async_ret);
2235     if (acb == NULL) {
2236         async_ret = -1;
2237         goto fail;
2238     }
2239     while (async_ret == NOT_DONE) {
2240         qemu_aio_wait();
2241     }
2242
2243 fail:
2244     async_context_pop();
2245     return async_ret;
2246 }
2247
2248 void bdrv_init(void)
2249 {
2250     module_call_init(MODULE_INIT_BLOCK);
2251 }
2252
2253 void bdrv_init_with_whitelist(void)
2254 {
2255     use_bdrv_whitelist = 1;
2256     bdrv_init();
2257 }
2258
2259 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2260                    BlockDriverCompletionFunc *cb, void *opaque)
2261 {
2262     BlockDriverAIOCB *acb;
2263
2264     if (pool->free_aiocb) {
2265         acb = pool->free_aiocb;
2266         pool->free_aiocb = acb->next;
2267     } else {
2268         acb = qemu_mallocz(pool->aiocb_size);
2269         acb->pool = pool;
2270     }
2271     acb->bs = bs;
2272     acb->cb = cb;
2273     acb->opaque = opaque;
2274     return acb;
2275 }
2276
2277 void qemu_aio_release(void *p)
2278 {
2279     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2280     AIOPool *pool = acb->pool;
2281     acb->next = pool->free_aiocb;
2282     pool->free_aiocb = acb;
2283 }
2284
2285 /**************************************************************/
2286 /* removable device support */
2287
2288 /**
2289  * Return TRUE if the media is present
2290  */
2291 int bdrv_is_inserted(BlockDriverState *bs)
2292 {
2293     BlockDriver *drv = bs->drv;
2294     int ret;
2295     if (!drv)
2296         return 0;
2297     if (!drv->bdrv_is_inserted)
2298         return 1;
2299     ret = drv->bdrv_is_inserted(bs);
2300     return ret;
2301 }
2302
2303 /**
2304  * Return TRUE if the media changed since the last call to this
2305  * function. It is currently only used for floppy disks
2306  */
2307 int bdrv_media_changed(BlockDriverState *bs)
2308 {
2309     BlockDriver *drv = bs->drv;
2310     int ret;
2311
2312     if (!drv || !drv->bdrv_media_changed)
2313         ret = -ENOTSUP;
2314     else
2315         ret = drv->bdrv_media_changed(bs);
2316     if (ret == -ENOTSUP)
2317         ret = bs->media_changed;
2318     bs->media_changed = 0;
2319     return ret;
2320 }
2321
2322 /**
2323  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2324  */
2325 int bdrv_eject(BlockDriverState *bs, int eject_flag)
2326 {
2327     BlockDriver *drv = bs->drv;
2328     int ret;
2329
2330     if (bs->locked) {
2331         return -EBUSY;
2332     }
2333
2334     if (!drv || !drv->bdrv_eject) {
2335         ret = -ENOTSUP;
2336     } else {
2337         ret = drv->bdrv_eject(bs, eject_flag);
2338     }
2339     if (ret == -ENOTSUP) {
2340         if (eject_flag)
2341             bdrv_close(bs);
2342         ret = 0;
2343     }
2344
2345     return ret;
2346 }
2347
2348 int bdrv_is_locked(BlockDriverState *bs)
2349 {
2350     return bs->locked;
2351 }
2352
2353 /**
2354  * Lock or unlock the media (if it is locked, the user won't be able
2355  * to eject it manually).
2356  */
2357 void bdrv_set_locked(BlockDriverState *bs, int locked)
2358 {
2359     BlockDriver *drv = bs->drv;
2360
2361     bs->locked = locked;
2362     if (drv && drv->bdrv_set_locked) {
2363         drv->bdrv_set_locked(bs, locked);
2364     }
2365 }
2366
2367 /* needed for generic scsi interface */
2368
2369 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2370 {
2371     BlockDriver *drv = bs->drv;
2372
2373     if (drv && drv->bdrv_ioctl)
2374         return drv->bdrv_ioctl(bs, req, buf);
2375     return -ENOTSUP;
2376 }
2377
2378 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2379         unsigned long int req, void *buf,
2380         BlockDriverCompletionFunc *cb, void *opaque)
2381 {
2382     BlockDriver *drv = bs->drv;
2383
2384     if (drv && drv->bdrv_aio_ioctl)
2385         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2386     return NULL;
2387 }
2388
2389
2390
2391 void *qemu_blockalign(BlockDriverState *bs, size_t size)
2392 {
2393     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2394 }
2395
2396 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2397 {
2398     int64_t bitmap_size;
2399
2400     bs->dirty_count = 0;
2401     if (enable) {
2402         if (!bs->dirty_bitmap) {
2403             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2404                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2405             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2406
2407             bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2408         }
2409     } else {
2410         if (bs->dirty_bitmap) {
2411             qemu_free(bs->dirty_bitmap);
2412             bs->dirty_bitmap = NULL;
2413         }
2414     }
2415 }
2416
2417 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2418 {
2419     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2420
2421     if (bs->dirty_bitmap &&
2422         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2423         return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2424             (1 << (chunk % (sizeof(unsigned long) * 8)));
2425     } else {
2426         return 0;
2427     }
2428 }
2429
2430 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2431                       int nr_sectors)
2432 {
2433     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2434 }
2435
2436 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
2437 {
2438     return bs->dirty_count;
2439 }
This page took 0.152398 seconds and 4 git commands to generate.