]> Git Repo - qemu.git/blob - block.c
lsi: Purge message queue on reset
[qemu.git] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
29 #include "qemu-objects.h"
30
31 #ifdef CONFIG_BSD
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/ioctl.h>
35 #include <sys/queue.h>
36 #ifndef __DragonFly__
37 #include <sys/disk.h>
38 #endif
39 #endif
40
41 #ifdef _WIN32
42 #include <windows.h>
43 #endif
44
45 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
46         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
47         BlockDriverCompletionFunc *cb, void *opaque);
48 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
49         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50         BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
54                         uint8_t *buf, int nb_sectors);
55 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
56                          const uint8_t *buf, int nb_sectors);
57 static BlockDriver *find_protocol(const char *filename);
58
59 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
60     QTAILQ_HEAD_INITIALIZER(bdrv_states);
61
62 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
63     QLIST_HEAD_INITIALIZER(bdrv_drivers);
64
65 /* If non-zero, use only whitelisted block drivers */
66 static int use_bdrv_whitelist;
67
68 int path_is_absolute(const char *path)
69 {
70     const char *p;
71 #ifdef _WIN32
72     /* specific case for names like: "\\.\d:" */
73     if (*path == '/' || *path == '\\')
74         return 1;
75 #endif
76     p = strchr(path, ':');
77     if (p)
78         p++;
79     else
80         p = path;
81 #ifdef _WIN32
82     return (*p == '/' || *p == '\\');
83 #else
84     return (*p == '/');
85 #endif
86 }
87
88 /* if filename is absolute, just copy it to dest. Otherwise, build a
89    path to it by considering it is relative to base_path. URL are
90    supported. */
91 void path_combine(char *dest, int dest_size,
92                   const char *base_path,
93                   const char *filename)
94 {
95     const char *p, *p1;
96     int len;
97
98     if (dest_size <= 0)
99         return;
100     if (path_is_absolute(filename)) {
101         pstrcpy(dest, dest_size, filename);
102     } else {
103         p = strchr(base_path, ':');
104         if (p)
105             p++;
106         else
107             p = base_path;
108         p1 = strrchr(base_path, '/');
109 #ifdef _WIN32
110         {
111             const char *p2;
112             p2 = strrchr(base_path, '\\');
113             if (!p1 || p2 > p1)
114                 p1 = p2;
115         }
116 #endif
117         if (p1)
118             p1++;
119         else
120             p1 = base_path;
121         if (p1 > p)
122             p = p1;
123         len = p - base_path;
124         if (len > dest_size - 1)
125             len = dest_size - 1;
126         memcpy(dest, base_path, len);
127         dest[len] = '\0';
128         pstrcat(dest, dest_size, filename);
129     }
130 }
131
132 void bdrv_register(BlockDriver *bdrv)
133 {
134     if (!bdrv->bdrv_aio_readv) {
135         /* add AIO emulation layer */
136         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
137         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
138     } else if (!bdrv->bdrv_read) {
139         /* add synchronous IO emulation layer */
140         bdrv->bdrv_read = bdrv_read_em;
141         bdrv->bdrv_write = bdrv_write_em;
142     }
143
144     if (!bdrv->bdrv_aio_flush)
145         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
146
147     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
148 }
149
150 /* create a new block device (by default it is empty) */
151 BlockDriverState *bdrv_new(const char *device_name)
152 {
153     BlockDriverState *bs;
154
155     bs = qemu_mallocz(sizeof(BlockDriverState));
156     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
157     if (device_name[0] != '\0') {
158         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
159     }
160     return bs;
161 }
162
163 BlockDriver *bdrv_find_format(const char *format_name)
164 {
165     BlockDriver *drv1;
166     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
167         if (!strcmp(drv1->format_name, format_name)) {
168             return drv1;
169         }
170     }
171     return NULL;
172 }
173
174 static int bdrv_is_whitelisted(BlockDriver *drv)
175 {
176     static const char *whitelist[] = {
177         CONFIG_BDRV_WHITELIST
178     };
179     const char **p;
180
181     if (!whitelist[0])
182         return 1;               /* no whitelist, anything goes */
183
184     for (p = whitelist; *p; p++) {
185         if (!strcmp(drv->format_name, *p)) {
186             return 1;
187         }
188     }
189     return 0;
190 }
191
192 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
193 {
194     BlockDriver *drv = bdrv_find_format(format_name);
195     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
196 }
197
198 int bdrv_create(BlockDriver *drv, const char* filename,
199     QEMUOptionParameter *options)
200 {
201     if (!drv->bdrv_create)
202         return -ENOTSUP;
203
204     return drv->bdrv_create(filename, options);
205 }
206
207 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
208 {
209     BlockDriver *drv;
210
211     drv = find_protocol(filename);
212     if (drv == NULL) {
213         drv = bdrv_find_format("file");
214     }
215
216     return bdrv_create(drv, filename, options);
217 }
218
219 #ifdef _WIN32
220 void get_tmp_filename(char *filename, int size)
221 {
222     char temp_dir[MAX_PATH];
223
224     GetTempPath(MAX_PATH, temp_dir);
225     GetTempFileName(temp_dir, "qem", 0, filename);
226 }
227 #else
228 void get_tmp_filename(char *filename, int size)
229 {
230     int fd;
231     const char *tmpdir;
232     /* XXX: race condition possible */
233     tmpdir = getenv("TMPDIR");
234     if (!tmpdir)
235         tmpdir = "/tmp";
236     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
237     fd = mkstemp(filename);
238     close(fd);
239 }
240 #endif
241
242 #ifdef _WIN32
243 static int is_windows_drive_prefix(const char *filename)
244 {
245     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
246              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
247             filename[1] == ':');
248 }
249
250 int is_windows_drive(const char *filename)
251 {
252     if (is_windows_drive_prefix(filename) &&
253         filename[2] == '\0')
254         return 1;
255     if (strstart(filename, "\\\\.\\", NULL) ||
256         strstart(filename, "//./", NULL))
257         return 1;
258     return 0;
259 }
260 #endif
261
262 /*
263  * Detect host devices. By convention, /dev/cdrom[N] is always
264  * recognized as a host CDROM.
265  */
266 static BlockDriver *find_hdev_driver(const char *filename)
267 {
268     int score_max = 0, score;
269     BlockDriver *drv = NULL, *d;
270
271     QLIST_FOREACH(d, &bdrv_drivers, list) {
272         if (d->bdrv_probe_device) {
273             score = d->bdrv_probe_device(filename);
274             if (score > score_max) {
275                 score_max = score;
276                 drv = d;
277             }
278         }
279     }
280
281     return drv;
282 }
283
284 static BlockDriver *find_protocol(const char *filename)
285 {
286     BlockDriver *drv1;
287     char protocol[128];
288     int len;
289     const char *p;
290
291     /* TODO Drivers without bdrv_file_open must be specified explicitly */
292
293 #ifdef _WIN32
294     if (is_windows_drive(filename) ||
295         is_windows_drive_prefix(filename))
296         return bdrv_find_format("file");
297 #endif
298     p = strchr(filename, ':');
299     if (!p) {
300         drv1 = find_hdev_driver(filename);
301         if (!drv1) {
302             drv1 = bdrv_find_format("file");
303         }
304         return drv1;
305     }
306     len = p - filename;
307     if (len > sizeof(protocol) - 1)
308         len = sizeof(protocol) - 1;
309     memcpy(protocol, filename, len);
310     protocol[len] = '\0';
311     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
312         if (drv1->protocol_name &&
313             !strcmp(drv1->protocol_name, protocol)) {
314             return drv1;
315         }
316     }
317     return NULL;
318 }
319
320 static BlockDriver *find_image_format(const char *filename)
321 {
322     int ret, score, score_max;
323     BlockDriver *drv1, *drv;
324     uint8_t buf[2048];
325     BlockDriverState *bs;
326
327     drv = find_protocol(filename);
328     /* no need to test disk image formats for vvfat */
329     if (drv && strcmp(drv->format_name, "vvfat") == 0)
330         return drv;
331
332     ret = bdrv_file_open(&bs, filename, 0);
333     if (ret < 0)
334         return NULL;
335     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
336     bdrv_delete(bs);
337     if (ret < 0) {
338         return NULL;
339     }
340
341     score_max = 0;
342     drv = NULL;
343     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
344         if (drv1->bdrv_probe) {
345             score = drv1->bdrv_probe(buf, ret, filename);
346             if (score > score_max) {
347                 score_max = score;
348                 drv = drv1;
349             }
350         }
351     }
352     return drv;
353 }
354
355 /**
356  * Set the current 'total_sectors' value
357  */
358 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
359 {
360     BlockDriver *drv = bs->drv;
361
362     /* query actual device if possible, otherwise just trust the hint */
363     if (drv->bdrv_getlength) {
364         int64_t length = drv->bdrv_getlength(bs);
365         if (length < 0) {
366             return length;
367         }
368         hint = length >> BDRV_SECTOR_BITS;
369     }
370
371     bs->total_sectors = hint;
372     return 0;
373 }
374
375 /*
376  * Common part for opening disk images and files
377  */
378 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
379     int flags, BlockDriver *drv)
380 {
381     int ret, open_flags;
382
383     assert(drv != NULL);
384
385     bs->file = NULL;
386     bs->total_sectors = 0;
387     bs->is_temporary = 0;
388     bs->encrypted = 0;
389     bs->valid_key = 0;
390     bs->open_flags = flags;
391     /* buffer_alignment defaulted to 512, drivers can change this value */
392     bs->buffer_alignment = 512;
393
394     pstrcpy(bs->filename, sizeof(bs->filename), filename);
395
396     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
397         return -ENOTSUP;
398     }
399
400     bs->drv = drv;
401     bs->opaque = qemu_mallocz(drv->instance_size);
402
403     /*
404      * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
405      * write cache to the guest.  We do need the fdatasync to flush
406      * out transactions for block allocations, and we maybe have a
407      * volatile write cache in our backing device to deal with.
408      */
409     if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
410         bs->enable_write_cache = 1;
411
412     /*
413      * Clear flags that are internal to the block layer before opening the
414      * image.
415      */
416     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
417
418     /*
419      * Snapshots should be writeable.
420      */
421     if (bs->is_temporary) {
422         open_flags |= BDRV_O_RDWR;
423     }
424
425     /* Open the image, either directly or using a protocol */
426     if (drv->bdrv_file_open) {
427         ret = drv->bdrv_file_open(bs, filename, open_flags);
428     } else {
429         ret = bdrv_file_open(&bs->file, filename, open_flags);
430         if (ret >= 0) {
431             ret = drv->bdrv_open(bs, open_flags);
432         }
433     }
434
435     if (ret < 0) {
436         goto free_and_fail;
437     }
438
439     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
440
441     ret = refresh_total_sectors(bs, bs->total_sectors);
442     if (ret < 0) {
443         goto free_and_fail;
444     }
445
446 #ifndef _WIN32
447     if (bs->is_temporary) {
448         unlink(filename);
449     }
450 #endif
451     return 0;
452
453 free_and_fail:
454     if (bs->file) {
455         bdrv_delete(bs->file);
456         bs->file = NULL;
457     }
458     qemu_free(bs->opaque);
459     bs->opaque = NULL;
460     bs->drv = NULL;
461     return ret;
462 }
463
464 /*
465  * Opens a file using a protocol (file, host_device, nbd, ...)
466  */
467 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
468 {
469     BlockDriverState *bs;
470     BlockDriver *drv;
471     int ret;
472
473     drv = find_protocol(filename);
474     if (!drv) {
475         return -ENOENT;
476     }
477
478     bs = bdrv_new("");
479     ret = bdrv_open_common(bs, filename, flags, drv);
480     if (ret < 0) {
481         bdrv_delete(bs);
482         return ret;
483     }
484     bs->growable = 1;
485     *pbs = bs;
486     return 0;
487 }
488
489 /*
490  * Opens a disk image (raw, qcow2, vmdk, ...)
491  */
492 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
493               BlockDriver *drv)
494 {
495     int ret;
496
497     if (flags & BDRV_O_SNAPSHOT) {
498         BlockDriverState *bs1;
499         int64_t total_size;
500         int is_protocol = 0;
501         BlockDriver *bdrv_qcow2;
502         QEMUOptionParameter *options;
503         char tmp_filename[PATH_MAX];
504         char backing_filename[PATH_MAX];
505
506         /* if snapshot, we create a temporary backing file and open it
507            instead of opening 'filename' directly */
508
509         /* if there is a backing file, use it */
510         bs1 = bdrv_new("");
511         ret = bdrv_open(bs1, filename, 0, drv);
512         if (ret < 0) {
513             bdrv_delete(bs1);
514             return ret;
515         }
516         total_size = bdrv_getlength(bs1) >> BDRV_SECTOR_BITS;
517
518         if (bs1->drv && bs1->drv->protocol_name)
519             is_protocol = 1;
520
521         bdrv_delete(bs1);
522
523         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
524
525         /* Real path is meaningless for protocols */
526         if (is_protocol)
527             snprintf(backing_filename, sizeof(backing_filename),
528                      "%s", filename);
529         else if (!realpath(filename, backing_filename))
530             return -errno;
531
532         bdrv_qcow2 = bdrv_find_format("qcow2");
533         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
534
535         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
536         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
537         if (drv) {
538             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
539                 drv->format_name);
540         }
541
542         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
543         free_option_parameters(options);
544         if (ret < 0) {
545             return ret;
546         }
547
548         filename = tmp_filename;
549         drv = bdrv_qcow2;
550         bs->is_temporary = 1;
551     }
552
553     /* Find the right image format driver */
554     if (!drv) {
555         drv = find_image_format(filename);
556     }
557
558     if (!drv) {
559         ret = -ENOENT;
560         goto unlink_and_fail;
561     }
562
563     /* Open the image */
564     ret = bdrv_open_common(bs, filename, flags, drv);
565     if (ret < 0) {
566         goto unlink_and_fail;
567     }
568
569     /* If there is a backing file, use it */
570     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
571         char backing_filename[PATH_MAX];
572         int back_flags;
573         BlockDriver *back_drv = NULL;
574
575         bs->backing_hd = bdrv_new("");
576         path_combine(backing_filename, sizeof(backing_filename),
577                      filename, bs->backing_file);
578         if (bs->backing_format[0] != '\0')
579             back_drv = bdrv_find_format(bs->backing_format);
580
581         /* backing files always opened read-only */
582         back_flags =
583             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
584
585         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
586         if (ret < 0) {
587             bdrv_close(bs);
588             return ret;
589         }
590         if (bs->is_temporary) {
591             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
592         } else {
593             /* base image inherits from "parent" */
594             bs->backing_hd->keep_read_only = bs->keep_read_only;
595         }
596     }
597
598     if (!bdrv_key_required(bs)) {
599         /* call the change callback */
600         bs->media_changed = 1;
601         if (bs->change_cb)
602             bs->change_cb(bs->change_opaque);
603     }
604
605     return 0;
606
607 unlink_and_fail:
608     if (bs->is_temporary) {
609         unlink(filename);
610     }
611     return ret;
612 }
613
614 void bdrv_close(BlockDriverState *bs)
615 {
616     if (bs->drv) {
617         if (bs->backing_hd) {
618             bdrv_delete(bs->backing_hd);
619             bs->backing_hd = NULL;
620         }
621         bs->drv->bdrv_close(bs);
622         qemu_free(bs->opaque);
623 #ifdef _WIN32
624         if (bs->is_temporary) {
625             unlink(bs->filename);
626         }
627 #endif
628         bs->opaque = NULL;
629         bs->drv = NULL;
630
631         if (bs->file != NULL) {
632             bdrv_close(bs->file);
633         }
634
635         /* call the change callback */
636         bs->media_changed = 1;
637         if (bs->change_cb)
638             bs->change_cb(bs->change_opaque);
639     }
640 }
641
642 void bdrv_delete(BlockDriverState *bs)
643 {
644     /* remove from list, if necessary */
645     if (bs->device_name[0] != '\0') {
646         QTAILQ_REMOVE(&bdrv_states, bs, list);
647     }
648
649     bdrv_close(bs);
650     if (bs->file != NULL) {
651         bdrv_delete(bs->file);
652     }
653
654     qemu_free(bs);
655 }
656
657 /*
658  * Run consistency checks on an image
659  *
660  * Returns the number of errors or -errno when an internal error occurs
661  */
662 int bdrv_check(BlockDriverState *bs)
663 {
664     if (bs->drv->bdrv_check == NULL) {
665         return -ENOTSUP;
666     }
667
668     return bs->drv->bdrv_check(bs);
669 }
670
671 /* commit COW file into the raw image */
672 int bdrv_commit(BlockDriverState *bs)
673 {
674     BlockDriver *drv = bs->drv;
675     int64_t i, total_sectors;
676     int n, j, ro, open_flags;
677     int ret = 0, rw_ret = 0;
678     unsigned char sector[512];
679     char filename[1024];
680     BlockDriverState *bs_rw, *bs_ro;
681
682     if (!drv)
683         return -ENOMEDIUM;
684     
685     if (!bs->backing_hd) {
686         return -ENOTSUP;
687     }
688
689     if (bs->backing_hd->keep_read_only) {
690         return -EACCES;
691     }
692     
693     ro = bs->backing_hd->read_only;
694     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
695     open_flags =  bs->backing_hd->open_flags;
696
697     if (ro) {
698         /* re-open as RW */
699         bdrv_delete(bs->backing_hd);
700         bs->backing_hd = NULL;
701         bs_rw = bdrv_new("");
702         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, NULL);
703         if (rw_ret < 0) {
704             bdrv_delete(bs_rw);
705             /* try to re-open read-only */
706             bs_ro = bdrv_new("");
707             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, NULL);
708             if (ret < 0) {
709                 bdrv_delete(bs_ro);
710                 /* drive not functional anymore */
711                 bs->drv = NULL;
712                 return ret;
713             }
714             bs->backing_hd = bs_ro;
715             return rw_ret;
716         }
717         bs->backing_hd = bs_rw;
718     }
719
720     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
721     for (i = 0; i < total_sectors;) {
722         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
723             for(j = 0; j < n; j++) {
724                 if (bdrv_read(bs, i, sector, 1) != 0) {
725                     ret = -EIO;
726                     goto ro_cleanup;
727                 }
728
729                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
730                     ret = -EIO;
731                     goto ro_cleanup;
732                 }
733                 i++;
734             }
735         } else {
736             i += n;
737         }
738     }
739
740     if (drv->bdrv_make_empty) {
741         ret = drv->bdrv_make_empty(bs);
742         bdrv_flush(bs);
743     }
744
745     /*
746      * Make sure all data we wrote to the backing device is actually
747      * stable on disk.
748      */
749     if (bs->backing_hd)
750         bdrv_flush(bs->backing_hd);
751
752 ro_cleanup:
753
754     if (ro) {
755         /* re-open as RO */
756         bdrv_delete(bs->backing_hd);
757         bs->backing_hd = NULL;
758         bs_ro = bdrv_new("");
759         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, NULL);
760         if (ret < 0) {
761             bdrv_delete(bs_ro);
762             /* drive not functional anymore */
763             bs->drv = NULL;
764             return ret;
765         }
766         bs->backing_hd = bs_ro;
767         bs->backing_hd->keep_read_only = 0;
768     }
769
770     return ret;
771 }
772
773 /*
774  * Return values:
775  * 0        - success
776  * -EINVAL  - backing format specified, but no file
777  * -ENOSPC  - can't update the backing file because no space is left in the
778  *            image file header
779  * -ENOTSUP - format driver doesn't support changing the backing file
780  */
781 int bdrv_change_backing_file(BlockDriverState *bs,
782     const char *backing_file, const char *backing_fmt)
783 {
784     BlockDriver *drv = bs->drv;
785
786     if (drv->bdrv_change_backing_file != NULL) {
787         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
788     } else {
789         return -ENOTSUP;
790     }
791 }
792
793 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
794                                    size_t size)
795 {
796     int64_t len;
797
798     if (!bdrv_is_inserted(bs))
799         return -ENOMEDIUM;
800
801     if (bs->growable)
802         return 0;
803
804     len = bdrv_getlength(bs);
805
806     if (offset < 0)
807         return -EIO;
808
809     if ((offset > len) || (len - offset < size))
810         return -EIO;
811
812     return 0;
813 }
814
815 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
816                               int nb_sectors)
817 {
818     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
819 }
820
821 /* return < 0 if error. See bdrv_write() for the return codes */
822 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
823               uint8_t *buf, int nb_sectors)
824 {
825     BlockDriver *drv = bs->drv;
826
827     if (!drv)
828         return -ENOMEDIUM;
829     if (bdrv_check_request(bs, sector_num, nb_sectors))
830         return -EIO;
831
832     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
833 }
834
835 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
836                              int nb_sectors, int dirty)
837 {
838     int64_t start, end;
839     unsigned long val, idx, bit;
840
841     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
842     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
843
844     for (; start <= end; start++) {
845         idx = start / (sizeof(unsigned long) * 8);
846         bit = start % (sizeof(unsigned long) * 8);
847         val = bs->dirty_bitmap[idx];
848         if (dirty) {
849             if (!(val & (1 << bit))) {
850                 bs->dirty_count++;
851                 val |= 1 << bit;
852             }
853         } else {
854             if (val & (1 << bit)) {
855                 bs->dirty_count--;
856                 val &= ~(1 << bit);
857             }
858         }
859         bs->dirty_bitmap[idx] = val;
860     }
861 }
862
863 /* Return < 0 if error. Important errors are:
864   -EIO         generic I/O error (may happen for all errors)
865   -ENOMEDIUM   No media inserted.
866   -EINVAL      Invalid sector number or nb_sectors
867   -EACCES      Trying to write a read-only device
868 */
869 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
870                const uint8_t *buf, int nb_sectors)
871 {
872     BlockDriver *drv = bs->drv;
873     if (!bs->drv)
874         return -ENOMEDIUM;
875     if (bs->read_only)
876         return -EACCES;
877     if (bdrv_check_request(bs, sector_num, nb_sectors))
878         return -EIO;
879
880     if (bs->dirty_bitmap) {
881         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
882     }
883
884     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
885         bs->wr_highest_sector = sector_num + nb_sectors - 1;
886     }
887
888     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
889 }
890
891 int bdrv_pread(BlockDriverState *bs, int64_t offset,
892                void *buf, int count1)
893 {
894     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
895     int len, nb_sectors, count;
896     int64_t sector_num;
897     int ret;
898
899     count = count1;
900     /* first read to align to sector start */
901     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
902     if (len > count)
903         len = count;
904     sector_num = offset >> BDRV_SECTOR_BITS;
905     if (len > 0) {
906         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
907             return ret;
908         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
909         count -= len;
910         if (count == 0)
911             return count1;
912         sector_num++;
913         buf += len;
914     }
915
916     /* read the sectors "in place" */
917     nb_sectors = count >> BDRV_SECTOR_BITS;
918     if (nb_sectors > 0) {
919         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
920             return ret;
921         sector_num += nb_sectors;
922         len = nb_sectors << BDRV_SECTOR_BITS;
923         buf += len;
924         count -= len;
925     }
926
927     /* add data from the last sector */
928     if (count > 0) {
929         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
930             return ret;
931         memcpy(buf, tmp_buf, count);
932     }
933     return count1;
934 }
935
936 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
937                 const void *buf, int count1)
938 {
939     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
940     int len, nb_sectors, count;
941     int64_t sector_num;
942     int ret;
943
944     count = count1;
945     /* first write to align to sector start */
946     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
947     if (len > count)
948         len = count;
949     sector_num = offset >> BDRV_SECTOR_BITS;
950     if (len > 0) {
951         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
952             return ret;
953         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
954         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
955             return ret;
956         count -= len;
957         if (count == 0)
958             return count1;
959         sector_num++;
960         buf += len;
961     }
962
963     /* write the sectors "in place" */
964     nb_sectors = count >> BDRV_SECTOR_BITS;
965     if (nb_sectors > 0) {
966         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
967             return ret;
968         sector_num += nb_sectors;
969         len = nb_sectors << BDRV_SECTOR_BITS;
970         buf += len;
971         count -= len;
972     }
973
974     /* add data from the last sector */
975     if (count > 0) {
976         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
977             return ret;
978         memcpy(tmp_buf, buf, count);
979         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
980             return ret;
981     }
982     return count1;
983 }
984
985 /**
986  * Truncate file to 'offset' bytes (needed only for file protocols)
987  */
988 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
989 {
990     BlockDriver *drv = bs->drv;
991     int ret;
992     if (!drv)
993         return -ENOMEDIUM;
994     if (!drv->bdrv_truncate)
995         return -ENOTSUP;
996     if (bs->read_only)
997         return -EACCES;
998     ret = drv->bdrv_truncate(bs, offset);
999     if (ret == 0) {
1000         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1001     }
1002     return ret;
1003 }
1004
1005 /**
1006  * Length of a file in bytes. Return < 0 if error or unknown.
1007  */
1008 int64_t bdrv_getlength(BlockDriverState *bs)
1009 {
1010     BlockDriver *drv = bs->drv;
1011     if (!drv)
1012         return -ENOMEDIUM;
1013
1014     /* Fixed size devices use the total_sectors value for speed instead of
1015        issuing a length query (like lseek) on each call.  Also, legacy block
1016        drivers don't provide a bdrv_getlength function and must use
1017        total_sectors. */
1018     if (!bs->growable || !drv->bdrv_getlength) {
1019         return bs->total_sectors * BDRV_SECTOR_SIZE;
1020     }
1021     return drv->bdrv_getlength(bs);
1022 }
1023
1024 /* return 0 as number of sectors if no device present or error */
1025 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1026 {
1027     int64_t length;
1028     length = bdrv_getlength(bs);
1029     if (length < 0)
1030         length = 0;
1031     else
1032         length = length >> BDRV_SECTOR_BITS;
1033     *nb_sectors_ptr = length;
1034 }
1035
1036 struct partition {
1037         uint8_t boot_ind;           /* 0x80 - active */
1038         uint8_t head;               /* starting head */
1039         uint8_t sector;             /* starting sector */
1040         uint8_t cyl;                /* starting cylinder */
1041         uint8_t sys_ind;            /* What partition type */
1042         uint8_t end_head;           /* end head */
1043         uint8_t end_sector;         /* end sector */
1044         uint8_t end_cyl;            /* end cylinder */
1045         uint32_t start_sect;        /* starting sector counting from 0 */
1046         uint32_t nr_sects;          /* nr of sectors in partition */
1047 } __attribute__((packed));
1048
1049 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1050 static int guess_disk_lchs(BlockDriverState *bs,
1051                            int *pcylinders, int *pheads, int *psectors)
1052 {
1053     uint8_t buf[512];
1054     int ret, i, heads, sectors, cylinders;
1055     struct partition *p;
1056     uint32_t nr_sects;
1057     uint64_t nb_sectors;
1058
1059     bdrv_get_geometry(bs, &nb_sectors);
1060
1061     ret = bdrv_read(bs, 0, buf, 1);
1062     if (ret < 0)
1063         return -1;
1064     /* test msdos magic */
1065     if (buf[510] != 0x55 || buf[511] != 0xaa)
1066         return -1;
1067     for(i = 0; i < 4; i++) {
1068         p = ((struct partition *)(buf + 0x1be)) + i;
1069         nr_sects = le32_to_cpu(p->nr_sects);
1070         if (nr_sects && p->end_head) {
1071             /* We make the assumption that the partition terminates on
1072                a cylinder boundary */
1073             heads = p->end_head + 1;
1074             sectors = p->end_sector & 63;
1075             if (sectors == 0)
1076                 continue;
1077             cylinders = nb_sectors / (heads * sectors);
1078             if (cylinders < 1 || cylinders > 16383)
1079                 continue;
1080             *pheads = heads;
1081             *psectors = sectors;
1082             *pcylinders = cylinders;
1083 #if 0
1084             printf("guessed geometry: LCHS=%d %d %d\n",
1085                    cylinders, heads, sectors);
1086 #endif
1087             return 0;
1088         }
1089     }
1090     return -1;
1091 }
1092
1093 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1094 {
1095     int translation, lba_detected = 0;
1096     int cylinders, heads, secs;
1097     uint64_t nb_sectors;
1098
1099     /* if a geometry hint is available, use it */
1100     bdrv_get_geometry(bs, &nb_sectors);
1101     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1102     translation = bdrv_get_translation_hint(bs);
1103     if (cylinders != 0) {
1104         *pcyls = cylinders;
1105         *pheads = heads;
1106         *psecs = secs;
1107     } else {
1108         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1109             if (heads > 16) {
1110                 /* if heads > 16, it means that a BIOS LBA
1111                    translation was active, so the default
1112                    hardware geometry is OK */
1113                 lba_detected = 1;
1114                 goto default_geometry;
1115             } else {
1116                 *pcyls = cylinders;
1117                 *pheads = heads;
1118                 *psecs = secs;
1119                 /* disable any translation to be in sync with
1120                    the logical geometry */
1121                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1122                     bdrv_set_translation_hint(bs,
1123                                               BIOS_ATA_TRANSLATION_NONE);
1124                 }
1125             }
1126         } else {
1127         default_geometry:
1128             /* if no geometry, use a standard physical disk geometry */
1129             cylinders = nb_sectors / (16 * 63);
1130
1131             if (cylinders > 16383)
1132                 cylinders = 16383;
1133             else if (cylinders < 2)
1134                 cylinders = 2;
1135             *pcyls = cylinders;
1136             *pheads = 16;
1137             *psecs = 63;
1138             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1139                 if ((*pcyls * *pheads) <= 131072) {
1140                     bdrv_set_translation_hint(bs,
1141                                               BIOS_ATA_TRANSLATION_LARGE);
1142                 } else {
1143                     bdrv_set_translation_hint(bs,
1144                                               BIOS_ATA_TRANSLATION_LBA);
1145                 }
1146             }
1147         }
1148         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1149     }
1150 }
1151
1152 void bdrv_set_geometry_hint(BlockDriverState *bs,
1153                             int cyls, int heads, int secs)
1154 {
1155     bs->cyls = cyls;
1156     bs->heads = heads;
1157     bs->secs = secs;
1158 }
1159
1160 void bdrv_set_type_hint(BlockDriverState *bs, int type)
1161 {
1162     bs->type = type;
1163     bs->removable = ((type == BDRV_TYPE_CDROM ||
1164                       type == BDRV_TYPE_FLOPPY));
1165 }
1166
1167 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1168 {
1169     bs->translation = translation;
1170 }
1171
1172 void bdrv_get_geometry_hint(BlockDriverState *bs,
1173                             int *pcyls, int *pheads, int *psecs)
1174 {
1175     *pcyls = bs->cyls;
1176     *pheads = bs->heads;
1177     *psecs = bs->secs;
1178 }
1179
1180 int bdrv_get_type_hint(BlockDriverState *bs)
1181 {
1182     return bs->type;
1183 }
1184
1185 int bdrv_get_translation_hint(BlockDriverState *bs)
1186 {
1187     return bs->translation;
1188 }
1189
1190 int bdrv_is_removable(BlockDriverState *bs)
1191 {
1192     return bs->removable;
1193 }
1194
1195 int bdrv_is_read_only(BlockDriverState *bs)
1196 {
1197     return bs->read_only;
1198 }
1199
1200 int bdrv_is_sg(BlockDriverState *bs)
1201 {
1202     return bs->sg;
1203 }
1204
1205 int bdrv_enable_write_cache(BlockDriverState *bs)
1206 {
1207     return bs->enable_write_cache;
1208 }
1209
1210 /* XXX: no longer used */
1211 void bdrv_set_change_cb(BlockDriverState *bs,
1212                         void (*change_cb)(void *opaque), void *opaque)
1213 {
1214     bs->change_cb = change_cb;
1215     bs->change_opaque = opaque;
1216 }
1217
1218 int bdrv_is_encrypted(BlockDriverState *bs)
1219 {
1220     if (bs->backing_hd && bs->backing_hd->encrypted)
1221         return 1;
1222     return bs->encrypted;
1223 }
1224
1225 int bdrv_key_required(BlockDriverState *bs)
1226 {
1227     BlockDriverState *backing_hd = bs->backing_hd;
1228
1229     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1230         return 1;
1231     return (bs->encrypted && !bs->valid_key);
1232 }
1233
1234 int bdrv_set_key(BlockDriverState *bs, const char *key)
1235 {
1236     int ret;
1237     if (bs->backing_hd && bs->backing_hd->encrypted) {
1238         ret = bdrv_set_key(bs->backing_hd, key);
1239         if (ret < 0)
1240             return ret;
1241         if (!bs->encrypted)
1242             return 0;
1243     }
1244     if (!bs->encrypted) {
1245         return -EINVAL;
1246     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1247         return -ENOMEDIUM;
1248     }
1249     ret = bs->drv->bdrv_set_key(bs, key);
1250     if (ret < 0) {
1251         bs->valid_key = 0;
1252     } else if (!bs->valid_key) {
1253         bs->valid_key = 1;
1254         /* call the change callback now, we skipped it on open */
1255         bs->media_changed = 1;
1256         if (bs->change_cb)
1257             bs->change_cb(bs->change_opaque);
1258     }
1259     return ret;
1260 }
1261
1262 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1263 {
1264     if (!bs->drv) {
1265         buf[0] = '\0';
1266     } else {
1267         pstrcpy(buf, buf_size, bs->drv->format_name);
1268     }
1269 }
1270
1271 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1272                          void *opaque)
1273 {
1274     BlockDriver *drv;
1275
1276     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1277         it(opaque, drv->format_name);
1278     }
1279 }
1280
1281 BlockDriverState *bdrv_find(const char *name)
1282 {
1283     BlockDriverState *bs;
1284
1285     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1286         if (!strcmp(name, bs->device_name)) {
1287             return bs;
1288         }
1289     }
1290     return NULL;
1291 }
1292
1293 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1294 {
1295     BlockDriverState *bs;
1296
1297     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1298         it(opaque, bs);
1299     }
1300 }
1301
1302 const char *bdrv_get_device_name(BlockDriverState *bs)
1303 {
1304     return bs->device_name;
1305 }
1306
1307 void bdrv_flush(BlockDriverState *bs)
1308 {
1309     if (bs->drv && bs->drv->bdrv_flush)
1310         bs->drv->bdrv_flush(bs);
1311 }
1312
1313 void bdrv_flush_all(void)
1314 {
1315     BlockDriverState *bs;
1316
1317     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1318         if (bs->drv && !bdrv_is_read_only(bs) &&
1319             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1320             bdrv_flush(bs);
1321         }
1322     }
1323 }
1324
1325 int bdrv_has_zero_init(BlockDriverState *bs)
1326 {
1327     assert(bs->drv);
1328
1329     if (bs->drv->no_zero_init) {
1330         return 0;
1331     } else if (bs->file) {
1332         return bdrv_has_zero_init(bs->file);
1333     }
1334
1335     return 1;
1336 }
1337
1338 /*
1339  * Returns true iff the specified sector is present in the disk image. Drivers
1340  * not implementing the functionality are assumed to not support backing files,
1341  * hence all their sectors are reported as allocated.
1342  *
1343  * 'pnum' is set to the number of sectors (including and immediately following
1344  * the specified sector) that are known to be in the same
1345  * allocated/unallocated state.
1346  *
1347  * 'nb_sectors' is the max value 'pnum' should be set to.
1348  */
1349 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1350         int *pnum)
1351 {
1352     int64_t n;
1353     if (!bs->drv->bdrv_is_allocated) {
1354         if (sector_num >= bs->total_sectors) {
1355             *pnum = 0;
1356             return 0;
1357         }
1358         n = bs->total_sectors - sector_num;
1359         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1360         return 1;
1361     }
1362     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1363 }
1364
1365 void bdrv_mon_event(const BlockDriverState *bdrv,
1366                     BlockMonEventAction action, int is_read)
1367 {
1368     QObject *data;
1369     const char *action_str;
1370
1371     switch (action) {
1372     case BDRV_ACTION_REPORT:
1373         action_str = "report";
1374         break;
1375     case BDRV_ACTION_IGNORE:
1376         action_str = "ignore";
1377         break;
1378     case BDRV_ACTION_STOP:
1379         action_str = "stop";
1380         break;
1381     default:
1382         abort();
1383     }
1384
1385     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1386                               bdrv->device_name,
1387                               action_str,
1388                               is_read ? "read" : "write");
1389     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1390
1391     qobject_decref(data);
1392 }
1393
1394 static void bdrv_print_dict(QObject *obj, void *opaque)
1395 {
1396     QDict *bs_dict;
1397     Monitor *mon = opaque;
1398
1399     bs_dict = qobject_to_qdict(obj);
1400
1401     monitor_printf(mon, "%s: type=%s removable=%d",
1402                         qdict_get_str(bs_dict, "device"),
1403                         qdict_get_str(bs_dict, "type"),
1404                         qdict_get_bool(bs_dict, "removable"));
1405
1406     if (qdict_get_bool(bs_dict, "removable")) {
1407         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1408     }
1409
1410     if (qdict_haskey(bs_dict, "inserted")) {
1411         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1412
1413         monitor_printf(mon, " file=");
1414         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1415         if (qdict_haskey(qdict, "backing_file")) {
1416             monitor_printf(mon, " backing_file=");
1417             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1418         }
1419         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1420                             qdict_get_bool(qdict, "ro"),
1421                             qdict_get_str(qdict, "drv"),
1422                             qdict_get_bool(qdict, "encrypted"));
1423     } else {
1424         monitor_printf(mon, " [not inserted]");
1425     }
1426
1427     monitor_printf(mon, "\n");
1428 }
1429
1430 void bdrv_info_print(Monitor *mon, const QObject *data)
1431 {
1432     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1433 }
1434
1435 /**
1436  * bdrv_info(): Block devices information
1437  *
1438  * Each block device information is stored in a QDict and the
1439  * returned QObject is a QList of all devices.
1440  *
1441  * The QDict contains the following:
1442  *
1443  * - "device": device name
1444  * - "type": device type
1445  * - "removable": true if the device is removable, false otherwise
1446  * - "locked": true if the device is locked, false otherwise
1447  * - "inserted": only present if the device is inserted, it is a QDict
1448  *    containing the following:
1449  *          - "file": device file name
1450  *          - "ro": true if read-only, false otherwise
1451  *          - "drv": driver format name
1452  *          - "backing_file": backing file name if one is used
1453  *          - "encrypted": true if encrypted, false otherwise
1454  *
1455  * Example:
1456  *
1457  * [ { "device": "ide0-hd0", "type": "hd", "removable": false, "locked": false,
1458  *     "inserted": { "file": "/tmp/foobar", "ro": false, "drv": "qcow2" } },
1459  *   { "device": "floppy0", "type": "floppy", "removable": true,
1460  *     "locked": false } ]
1461  */
1462 void bdrv_info(Monitor *mon, QObject **ret_data)
1463 {
1464     QList *bs_list;
1465     BlockDriverState *bs;
1466
1467     bs_list = qlist_new();
1468
1469     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1470         QObject *bs_obj;
1471         const char *type = "unknown";
1472
1473         switch(bs->type) {
1474         case BDRV_TYPE_HD:
1475             type = "hd";
1476             break;
1477         case BDRV_TYPE_CDROM:
1478             type = "cdrom";
1479             break;
1480         case BDRV_TYPE_FLOPPY:
1481             type = "floppy";
1482             break;
1483         }
1484
1485         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, "
1486                                     "'removable': %i, 'locked': %i }",
1487                                     bs->device_name, type, bs->removable,
1488                                     bs->locked);
1489
1490         if (bs->drv) {
1491             QObject *obj;
1492             QDict *bs_dict = qobject_to_qdict(bs_obj);
1493
1494             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1495                                      "'encrypted': %i }",
1496                                      bs->filename, bs->read_only,
1497                                      bs->drv->format_name,
1498                                      bdrv_is_encrypted(bs));
1499             if (bs->backing_file[0] != '\0') {
1500                 QDict *qdict = qobject_to_qdict(obj);
1501                 qdict_put(qdict, "backing_file",
1502                           qstring_from_str(bs->backing_file));
1503             }
1504
1505             qdict_put_obj(bs_dict, "inserted", obj);
1506         }
1507         qlist_append_obj(bs_list, bs_obj);
1508     }
1509
1510     *ret_data = QOBJECT(bs_list);
1511 }
1512
1513 static void bdrv_stats_iter(QObject *data, void *opaque)
1514 {
1515     QDict *qdict;
1516     Monitor *mon = opaque;
1517
1518     qdict = qobject_to_qdict(data);
1519     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1520
1521     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1522     monitor_printf(mon, " rd_bytes=%" PRId64
1523                         " wr_bytes=%" PRId64
1524                         " rd_operations=%" PRId64
1525                         " wr_operations=%" PRId64
1526                         "\n",
1527                         qdict_get_int(qdict, "rd_bytes"),
1528                         qdict_get_int(qdict, "wr_bytes"),
1529                         qdict_get_int(qdict, "rd_operations"),
1530                         qdict_get_int(qdict, "wr_operations"));
1531 }
1532
1533 void bdrv_stats_print(Monitor *mon, const QObject *data)
1534 {
1535     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1536 }
1537
1538 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1539 {
1540     QObject *res;
1541     QDict *dict;
1542
1543     res = qobject_from_jsonf("{ 'stats': {"
1544                              "'rd_bytes': %" PRId64 ","
1545                              "'wr_bytes': %" PRId64 ","
1546                              "'rd_operations': %" PRId64 ","
1547                              "'wr_operations': %" PRId64 ","
1548                              "'wr_highest_offset': %" PRId64
1549                              "} }",
1550                              bs->rd_bytes, bs->wr_bytes,
1551                              bs->rd_ops, bs->wr_ops,
1552                              bs->wr_highest_sector * 512);
1553     dict  = qobject_to_qdict(res);
1554
1555     if (*bs->device_name) {
1556         qdict_put(dict, "device", qstring_from_str(bs->device_name));
1557     }
1558
1559     if (bs->file) {
1560         QObject *parent = bdrv_info_stats_bs(bs->file);
1561         qdict_put_obj(dict, "parent", parent);
1562     }
1563
1564     return res;
1565 }
1566
1567 /**
1568  * bdrv_info_stats(): show block device statistics
1569  *
1570  * Each device statistic information is stored in a QDict and
1571  * the returned QObject is a QList of all devices.
1572  *
1573  * The QDict contains the following:
1574  *
1575  * - "device": device name
1576  * - "stats": A QDict with the statistics information, it contains:
1577  *     - "rd_bytes": bytes read
1578  *     - "wr_bytes": bytes written
1579  *     - "rd_operations": read operations
1580  *     - "wr_operations": write operations
1581  *     - "wr_highest_offset": Highest offset of a sector written since the
1582  *       BlockDriverState has been opened
1583  *     - "parent": Contains recursively the statistics of the underlying
1584  *       protocol (e.g. the host file for a qcow2 image). If there is no
1585  *       underlying protocol, this field is omitted.
1586  *
1587  * Example:
1588  *
1589  * [ { "device": "ide0-hd0",
1590  *               "stats": { "rd_bytes": 512,
1591  *                          "wr_bytes": 0,
1592  *                          "rd_operations": 1,
1593  *                          "wr_operations": 0,
1594  *                          "wr_highest_offset": 0,
1595  *                          "parent": {
1596  *                              "stats": { "rd_bytes": 1024,
1597  *                                         "wr_bytes": 0,
1598  *                                         "rd_operations": 2,
1599  *                                         "wr_operations": 0,
1600  *                                         "wr_highest_offset": 0,
1601  *                              }
1602  *                          } } },
1603  *   { "device": "ide1-cd0",
1604  *               "stats": { "rd_bytes": 0,
1605  *                          "wr_bytes": 0,
1606  *                          "rd_operations": 0,
1607  *                          "wr_operations": 0,
1608  *                          "wr_highest_offset": 0 } },
1609  */
1610 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1611 {
1612     QObject *obj;
1613     QList *devices;
1614     BlockDriverState *bs;
1615
1616     devices = qlist_new();
1617
1618     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1619         obj = bdrv_info_stats_bs(bs);
1620         qlist_append_obj(devices, obj);
1621     }
1622
1623     *ret_data = QOBJECT(devices);
1624 }
1625
1626 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1627 {
1628     if (bs->backing_hd && bs->backing_hd->encrypted)
1629         return bs->backing_file;
1630     else if (bs->encrypted)
1631         return bs->filename;
1632     else
1633         return NULL;
1634 }
1635
1636 void bdrv_get_backing_filename(BlockDriverState *bs,
1637                                char *filename, int filename_size)
1638 {
1639     if (!bs->backing_file) {
1640         pstrcpy(filename, filename_size, "");
1641     } else {
1642         pstrcpy(filename, filename_size, bs->backing_file);
1643     }
1644 }
1645
1646 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1647                           const uint8_t *buf, int nb_sectors)
1648 {
1649     BlockDriver *drv = bs->drv;
1650     if (!drv)
1651         return -ENOMEDIUM;
1652     if (!drv->bdrv_write_compressed)
1653         return -ENOTSUP;
1654     if (bdrv_check_request(bs, sector_num, nb_sectors))
1655         return -EIO;
1656
1657     if (bs->dirty_bitmap) {
1658         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1659     }
1660
1661     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1662 }
1663
1664 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1665 {
1666     BlockDriver *drv = bs->drv;
1667     if (!drv)
1668         return -ENOMEDIUM;
1669     if (!drv->bdrv_get_info)
1670         return -ENOTSUP;
1671     memset(bdi, 0, sizeof(*bdi));
1672     return drv->bdrv_get_info(bs, bdi);
1673 }
1674
1675 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1676                       int64_t pos, int size)
1677 {
1678     BlockDriver *drv = bs->drv;
1679     if (!drv)
1680         return -ENOMEDIUM;
1681     if (!drv->bdrv_save_vmstate)
1682         return -ENOTSUP;
1683     return drv->bdrv_save_vmstate(bs, buf, pos, size);
1684 }
1685
1686 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1687                       int64_t pos, int size)
1688 {
1689     BlockDriver *drv = bs->drv;
1690     if (!drv)
1691         return -ENOMEDIUM;
1692     if (!drv->bdrv_load_vmstate)
1693         return -ENOTSUP;
1694     return drv->bdrv_load_vmstate(bs, buf, pos, size);
1695 }
1696
1697 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1698 {
1699     BlockDriver *drv = bs->drv;
1700
1701     if (!drv || !drv->bdrv_debug_event) {
1702         return;
1703     }
1704
1705     return drv->bdrv_debug_event(bs, event);
1706
1707 }
1708
1709 /**************************************************************/
1710 /* handling of snapshots */
1711
1712 int bdrv_snapshot_create(BlockDriverState *bs,
1713                          QEMUSnapshotInfo *sn_info)
1714 {
1715     BlockDriver *drv = bs->drv;
1716     if (!drv)
1717         return -ENOMEDIUM;
1718     if (!drv->bdrv_snapshot_create)
1719         return -ENOTSUP;
1720     return drv->bdrv_snapshot_create(bs, sn_info);
1721 }
1722
1723 int bdrv_snapshot_goto(BlockDriverState *bs,
1724                        const char *snapshot_id)
1725 {
1726     BlockDriver *drv = bs->drv;
1727     if (!drv)
1728         return -ENOMEDIUM;
1729     if (!drv->bdrv_snapshot_goto)
1730         return -ENOTSUP;
1731     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1732 }
1733
1734 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1735 {
1736     BlockDriver *drv = bs->drv;
1737     if (!drv)
1738         return -ENOMEDIUM;
1739     if (!drv->bdrv_snapshot_delete)
1740         return -ENOTSUP;
1741     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1742 }
1743
1744 int bdrv_snapshot_list(BlockDriverState *bs,
1745                        QEMUSnapshotInfo **psn_info)
1746 {
1747     BlockDriver *drv = bs->drv;
1748     if (!drv)
1749         return -ENOMEDIUM;
1750     if (!drv->bdrv_snapshot_list)
1751         return -ENOTSUP;
1752     return drv->bdrv_snapshot_list(bs, psn_info);
1753 }
1754
1755 #define NB_SUFFIXES 4
1756
1757 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1758 {
1759     static const char suffixes[NB_SUFFIXES] = "KMGT";
1760     int64_t base;
1761     int i;
1762
1763     if (size <= 999) {
1764         snprintf(buf, buf_size, "%" PRId64, size);
1765     } else {
1766         base = 1024;
1767         for(i = 0; i < NB_SUFFIXES; i++) {
1768             if (size < (10 * base)) {
1769                 snprintf(buf, buf_size, "%0.1f%c",
1770                          (double)size / base,
1771                          suffixes[i]);
1772                 break;
1773             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1774                 snprintf(buf, buf_size, "%" PRId64 "%c",
1775                          ((size + (base >> 1)) / base),
1776                          suffixes[i]);
1777                 break;
1778             }
1779             base = base * 1024;
1780         }
1781     }
1782     return buf;
1783 }
1784
1785 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1786 {
1787     char buf1[128], date_buf[128], clock_buf[128];
1788 #ifdef _WIN32
1789     struct tm *ptm;
1790 #else
1791     struct tm tm;
1792 #endif
1793     time_t ti;
1794     int64_t secs;
1795
1796     if (!sn) {
1797         snprintf(buf, buf_size,
1798                  "%-10s%-20s%7s%20s%15s",
1799                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1800     } else {
1801         ti = sn->date_sec;
1802 #ifdef _WIN32
1803         ptm = localtime(&ti);
1804         strftime(date_buf, sizeof(date_buf),
1805                  "%Y-%m-%d %H:%M:%S", ptm);
1806 #else
1807         localtime_r(&ti, &tm);
1808         strftime(date_buf, sizeof(date_buf),
1809                  "%Y-%m-%d %H:%M:%S", &tm);
1810 #endif
1811         secs = sn->vm_clock_nsec / 1000000000;
1812         snprintf(clock_buf, sizeof(clock_buf),
1813                  "%02d:%02d:%02d.%03d",
1814                  (int)(secs / 3600),
1815                  (int)((secs / 60) % 60),
1816                  (int)(secs % 60),
1817                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1818         snprintf(buf, buf_size,
1819                  "%-10s%-20s%7s%20s%15s",
1820                  sn->id_str, sn->name,
1821                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1822                  date_buf,
1823                  clock_buf);
1824     }
1825     return buf;
1826 }
1827
1828
1829 /**************************************************************/
1830 /* async I/Os */
1831
1832 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1833                                  QEMUIOVector *qiov, int nb_sectors,
1834                                  BlockDriverCompletionFunc *cb, void *opaque)
1835 {
1836     BlockDriver *drv = bs->drv;
1837     BlockDriverAIOCB *ret;
1838
1839     if (!drv)
1840         return NULL;
1841     if (bdrv_check_request(bs, sector_num, nb_sectors))
1842         return NULL;
1843
1844     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1845                               cb, opaque);
1846
1847     if (ret) {
1848         /* Update stats even though technically transfer has not happened. */
1849         bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1850         bs->rd_ops ++;
1851     }
1852
1853     return ret;
1854 }
1855
1856 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1857                                   QEMUIOVector *qiov, int nb_sectors,
1858                                   BlockDriverCompletionFunc *cb, void *opaque)
1859 {
1860     BlockDriver *drv = bs->drv;
1861     BlockDriverAIOCB *ret;
1862
1863     if (!drv)
1864         return NULL;
1865     if (bs->read_only)
1866         return NULL;
1867     if (bdrv_check_request(bs, sector_num, nb_sectors))
1868         return NULL;
1869
1870     if (bs->dirty_bitmap) {
1871         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1872     }
1873
1874     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1875                                cb, opaque);
1876
1877     if (ret) {
1878         /* Update stats even though technically transfer has not happened. */
1879         bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1880         bs->wr_ops ++;
1881         if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1882             bs->wr_highest_sector = sector_num + nb_sectors - 1;
1883         }
1884     }
1885
1886     return ret;
1887 }
1888
1889
1890 typedef struct MultiwriteCB {
1891     int error;
1892     int num_requests;
1893     int num_callbacks;
1894     struct {
1895         BlockDriverCompletionFunc *cb;
1896         void *opaque;
1897         QEMUIOVector *free_qiov;
1898         void *free_buf;
1899     } callbacks[];
1900 } MultiwriteCB;
1901
1902 static void multiwrite_user_cb(MultiwriteCB *mcb)
1903 {
1904     int i;
1905
1906     for (i = 0; i < mcb->num_callbacks; i++) {
1907         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1908         if (mcb->callbacks[i].free_qiov) {
1909             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
1910         }
1911         qemu_free(mcb->callbacks[i].free_qiov);
1912         qemu_vfree(mcb->callbacks[i].free_buf);
1913     }
1914 }
1915
1916 static void multiwrite_cb(void *opaque, int ret)
1917 {
1918     MultiwriteCB *mcb = opaque;
1919
1920     if (ret < 0 && !mcb->error) {
1921         mcb->error = ret;
1922         multiwrite_user_cb(mcb);
1923     }
1924
1925     mcb->num_requests--;
1926     if (mcb->num_requests == 0) {
1927         if (mcb->error == 0) {
1928             multiwrite_user_cb(mcb);
1929         }
1930         qemu_free(mcb);
1931     }
1932 }
1933
1934 static int multiwrite_req_compare(const void *a, const void *b)
1935 {
1936     return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector);
1937 }
1938
1939 /*
1940  * Takes a bunch of requests and tries to merge them. Returns the number of
1941  * requests that remain after merging.
1942  */
1943 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
1944     int num_reqs, MultiwriteCB *mcb)
1945 {
1946     int i, outidx;
1947
1948     // Sort requests by start sector
1949     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
1950
1951     // Check if adjacent requests touch the same clusters. If so, combine them,
1952     // filling up gaps with zero sectors.
1953     outidx = 0;
1954     for (i = 1; i < num_reqs; i++) {
1955         int merge = 0;
1956         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
1957
1958         // This handles the cases that are valid for all block drivers, namely
1959         // exactly sequential writes and overlapping writes.
1960         if (reqs[i].sector <= oldreq_last) {
1961             merge = 1;
1962         }
1963
1964         // The block driver may decide that it makes sense to combine requests
1965         // even if there is a gap of some sectors between them. In this case,
1966         // the gap is filled with zeros (therefore only applicable for yet
1967         // unused space in format like qcow2).
1968         if (!merge && bs->drv->bdrv_merge_requests) {
1969             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
1970         }
1971
1972         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
1973             merge = 0;
1974         }
1975
1976         if (merge) {
1977             size_t size;
1978             QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
1979             qemu_iovec_init(qiov,
1980                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
1981
1982             // Add the first request to the merged one. If the requests are
1983             // overlapping, drop the last sectors of the first request.
1984             size = (reqs[i].sector - reqs[outidx].sector) << 9;
1985             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
1986
1987             // We might need to add some zeros between the two requests
1988             if (reqs[i].sector > oldreq_last) {
1989                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
1990                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
1991                 memset(buf, 0, zero_bytes);
1992                 qemu_iovec_add(qiov, buf, zero_bytes);
1993                 mcb->callbacks[i].free_buf = buf;
1994             }
1995
1996             // Add the second request
1997             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
1998
1999             reqs[outidx].nb_sectors += reqs[i].nb_sectors;
2000             reqs[outidx].qiov = qiov;
2001
2002             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2003         } else {
2004             outidx++;
2005             reqs[outidx].sector     = reqs[i].sector;
2006             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2007             reqs[outidx].qiov       = reqs[i].qiov;
2008         }
2009     }
2010
2011     return outidx + 1;
2012 }
2013
2014 /*
2015  * Submit multiple AIO write requests at once.
2016  *
2017  * On success, the function returns 0 and all requests in the reqs array have
2018  * been submitted. In error case this function returns -1, and any of the
2019  * requests may or may not be submitted yet. In particular, this means that the
2020  * callback will be called for some of the requests, for others it won't. The
2021  * caller must check the error field of the BlockRequest to wait for the right
2022  * callbacks (if error != 0, no callback will be called).
2023  *
2024  * The implementation may modify the contents of the reqs array, e.g. to merge
2025  * requests. However, the fields opaque and error are left unmodified as they
2026  * are used to signal failure for a single request to the caller.
2027  */
2028 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2029 {
2030     BlockDriverAIOCB *acb;
2031     MultiwriteCB *mcb;
2032     int i;
2033
2034     if (num_reqs == 0) {
2035         return 0;
2036     }
2037
2038     // Create MultiwriteCB structure
2039     mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2040     mcb->num_requests = 0;
2041     mcb->num_callbacks = num_reqs;
2042
2043     for (i = 0; i < num_reqs; i++) {
2044         mcb->callbacks[i].cb = reqs[i].cb;
2045         mcb->callbacks[i].opaque = reqs[i].opaque;
2046     }
2047
2048     // Check for mergable requests
2049     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2050
2051     // Run the aio requests
2052     for (i = 0; i < num_reqs; i++) {
2053         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2054             reqs[i].nb_sectors, multiwrite_cb, mcb);
2055
2056         if (acb == NULL) {
2057             // We can only fail the whole thing if no request has been
2058             // submitted yet. Otherwise we'll wait for the submitted AIOs to
2059             // complete and report the error in the callback.
2060             if (mcb->num_requests == 0) {
2061                 reqs[i].error = -EIO;
2062                 goto fail;
2063             } else {
2064                 mcb->num_requests++;
2065                 multiwrite_cb(mcb, -EIO);
2066                 break;
2067             }
2068         } else {
2069             mcb->num_requests++;
2070         }
2071     }
2072
2073     return 0;
2074
2075 fail:
2076     free(mcb);
2077     return -1;
2078 }
2079
2080 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2081         BlockDriverCompletionFunc *cb, void *opaque)
2082 {
2083     BlockDriver *drv = bs->drv;
2084
2085     if (!drv)
2086         return NULL;
2087     return drv->bdrv_aio_flush(bs, cb, opaque);
2088 }
2089
2090 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2091 {
2092     acb->pool->cancel(acb);
2093 }
2094
2095
2096 /**************************************************************/
2097 /* async block device emulation */
2098
2099 typedef struct BlockDriverAIOCBSync {
2100     BlockDriverAIOCB common;
2101     QEMUBH *bh;
2102     int ret;
2103     /* vector translation state */
2104     QEMUIOVector *qiov;
2105     uint8_t *bounce;
2106     int is_write;
2107 } BlockDriverAIOCBSync;
2108
2109 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2110 {
2111     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
2112     qemu_bh_delete(acb->bh);
2113     acb->bh = NULL;
2114     qemu_aio_release(acb);
2115 }
2116
2117 static AIOPool bdrv_em_aio_pool = {
2118     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2119     .cancel             = bdrv_aio_cancel_em,
2120 };
2121
2122 static void bdrv_aio_bh_cb(void *opaque)
2123 {
2124     BlockDriverAIOCBSync *acb = opaque;
2125
2126     if (!acb->is_write)
2127         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2128     qemu_vfree(acb->bounce);
2129     acb->common.cb(acb->common.opaque, acb->ret);
2130     qemu_bh_delete(acb->bh);
2131     acb->bh = NULL;
2132     qemu_aio_release(acb);
2133 }
2134
2135 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2136                                             int64_t sector_num,
2137                                             QEMUIOVector *qiov,
2138                                             int nb_sectors,
2139                                             BlockDriverCompletionFunc *cb,
2140                                             void *opaque,
2141                                             int is_write)
2142
2143 {
2144     BlockDriverAIOCBSync *acb;
2145
2146     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2147     acb->is_write = is_write;
2148     acb->qiov = qiov;
2149     acb->bounce = qemu_blockalign(bs, qiov->size);
2150
2151     if (!acb->bh)
2152         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2153
2154     if (is_write) {
2155         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2156         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2157     } else {
2158         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2159     }
2160
2161     qemu_bh_schedule(acb->bh);
2162
2163     return &acb->common;
2164 }
2165
2166 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2167         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2168         BlockDriverCompletionFunc *cb, void *opaque)
2169 {
2170     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2171 }
2172
2173 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2174         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2175         BlockDriverCompletionFunc *cb, void *opaque)
2176 {
2177     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2178 }
2179
2180 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2181         BlockDriverCompletionFunc *cb, void *opaque)
2182 {
2183     BlockDriverAIOCBSync *acb;
2184
2185     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2186     acb->is_write = 1; /* don't bounce in the completion hadler */
2187     acb->qiov = NULL;
2188     acb->bounce = NULL;
2189     acb->ret = 0;
2190
2191     if (!acb->bh)
2192         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2193
2194     bdrv_flush(bs);
2195     qemu_bh_schedule(acb->bh);
2196     return &acb->common;
2197 }
2198
2199 /**************************************************************/
2200 /* sync block device emulation */
2201
2202 static void bdrv_rw_em_cb(void *opaque, int ret)
2203 {
2204     *(int *)opaque = ret;
2205 }
2206
2207 #define NOT_DONE 0x7fffffff
2208
2209 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2210                         uint8_t *buf, int nb_sectors)
2211 {
2212     int async_ret;
2213     BlockDriverAIOCB *acb;
2214     struct iovec iov;
2215     QEMUIOVector qiov;
2216
2217     async_context_push();
2218
2219     async_ret = NOT_DONE;
2220     iov.iov_base = (void *)buf;
2221     iov.iov_len = nb_sectors * 512;
2222     qemu_iovec_init_external(&qiov, &iov, 1);
2223     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2224         bdrv_rw_em_cb, &async_ret);
2225     if (acb == NULL) {
2226         async_ret = -1;
2227         goto fail;
2228     }
2229
2230     while (async_ret == NOT_DONE) {
2231         qemu_aio_wait();
2232     }
2233
2234
2235 fail:
2236     async_context_pop();
2237     return async_ret;
2238 }
2239
2240 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2241                          const uint8_t *buf, int nb_sectors)
2242 {
2243     int async_ret;
2244     BlockDriverAIOCB *acb;
2245     struct iovec iov;
2246     QEMUIOVector qiov;
2247
2248     async_context_push();
2249
2250     async_ret = NOT_DONE;
2251     iov.iov_base = (void *)buf;
2252     iov.iov_len = nb_sectors * 512;
2253     qemu_iovec_init_external(&qiov, &iov, 1);
2254     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2255         bdrv_rw_em_cb, &async_ret);
2256     if (acb == NULL) {
2257         async_ret = -1;
2258         goto fail;
2259     }
2260     while (async_ret == NOT_DONE) {
2261         qemu_aio_wait();
2262     }
2263
2264 fail:
2265     async_context_pop();
2266     return async_ret;
2267 }
2268
2269 void bdrv_init(void)
2270 {
2271     module_call_init(MODULE_INIT_BLOCK);
2272 }
2273
2274 void bdrv_init_with_whitelist(void)
2275 {
2276     use_bdrv_whitelist = 1;
2277     bdrv_init();
2278 }
2279
2280 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2281                    BlockDriverCompletionFunc *cb, void *opaque)
2282 {
2283     BlockDriverAIOCB *acb;
2284
2285     if (pool->free_aiocb) {
2286         acb = pool->free_aiocb;
2287         pool->free_aiocb = acb->next;
2288     } else {
2289         acb = qemu_mallocz(pool->aiocb_size);
2290         acb->pool = pool;
2291     }
2292     acb->bs = bs;
2293     acb->cb = cb;
2294     acb->opaque = opaque;
2295     return acb;
2296 }
2297
2298 void qemu_aio_release(void *p)
2299 {
2300     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2301     AIOPool *pool = acb->pool;
2302     acb->next = pool->free_aiocb;
2303     pool->free_aiocb = acb;
2304 }
2305
2306 /**************************************************************/
2307 /* removable device support */
2308
2309 /**
2310  * Return TRUE if the media is present
2311  */
2312 int bdrv_is_inserted(BlockDriverState *bs)
2313 {
2314     BlockDriver *drv = bs->drv;
2315     int ret;
2316     if (!drv)
2317         return 0;
2318     if (!drv->bdrv_is_inserted)
2319         return 1;
2320     ret = drv->bdrv_is_inserted(bs);
2321     return ret;
2322 }
2323
2324 /**
2325  * Return TRUE if the media changed since the last call to this
2326  * function. It is currently only used for floppy disks
2327  */
2328 int bdrv_media_changed(BlockDriverState *bs)
2329 {
2330     BlockDriver *drv = bs->drv;
2331     int ret;
2332
2333     if (!drv || !drv->bdrv_media_changed)
2334         ret = -ENOTSUP;
2335     else
2336         ret = drv->bdrv_media_changed(bs);
2337     if (ret == -ENOTSUP)
2338         ret = bs->media_changed;
2339     bs->media_changed = 0;
2340     return ret;
2341 }
2342
2343 /**
2344  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2345  */
2346 int bdrv_eject(BlockDriverState *bs, int eject_flag)
2347 {
2348     BlockDriver *drv = bs->drv;
2349     int ret;
2350
2351     if (bs->locked) {
2352         return -EBUSY;
2353     }
2354
2355     if (!drv || !drv->bdrv_eject) {
2356         ret = -ENOTSUP;
2357     } else {
2358         ret = drv->bdrv_eject(bs, eject_flag);
2359     }
2360     if (ret == -ENOTSUP) {
2361         if (eject_flag)
2362             bdrv_close(bs);
2363         ret = 0;
2364     }
2365
2366     return ret;
2367 }
2368
2369 int bdrv_is_locked(BlockDriverState *bs)
2370 {
2371     return bs->locked;
2372 }
2373
2374 /**
2375  * Lock or unlock the media (if it is locked, the user won't be able
2376  * to eject it manually).
2377  */
2378 void bdrv_set_locked(BlockDriverState *bs, int locked)
2379 {
2380     BlockDriver *drv = bs->drv;
2381
2382     bs->locked = locked;
2383     if (drv && drv->bdrv_set_locked) {
2384         drv->bdrv_set_locked(bs, locked);
2385     }
2386 }
2387
2388 /* needed for generic scsi interface */
2389
2390 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2391 {
2392     BlockDriver *drv = bs->drv;
2393
2394     if (drv && drv->bdrv_ioctl)
2395         return drv->bdrv_ioctl(bs, req, buf);
2396     return -ENOTSUP;
2397 }
2398
2399 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2400         unsigned long int req, void *buf,
2401         BlockDriverCompletionFunc *cb, void *opaque)
2402 {
2403     BlockDriver *drv = bs->drv;
2404
2405     if (drv && drv->bdrv_aio_ioctl)
2406         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2407     return NULL;
2408 }
2409
2410
2411
2412 void *qemu_blockalign(BlockDriverState *bs, size_t size)
2413 {
2414     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2415 }
2416
2417 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2418 {
2419     int64_t bitmap_size;
2420
2421     bs->dirty_count = 0;
2422     if (enable) {
2423         if (!bs->dirty_bitmap) {
2424             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2425                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2426             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2427
2428             bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2429         }
2430     } else {
2431         if (bs->dirty_bitmap) {
2432             qemu_free(bs->dirty_bitmap);
2433             bs->dirty_bitmap = NULL;
2434         }
2435     }
2436 }
2437
2438 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2439 {
2440     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2441
2442     if (bs->dirty_bitmap &&
2443         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2444         return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2445             (1 << (chunk % (sizeof(unsigned long) * 8)));
2446     } else {
2447         return 0;
2448     }
2449 }
2450
2451 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2452                       int nr_sectors)
2453 {
2454     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2455 }
2456
2457 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
2458 {
2459     return bs->dirty_count;
2460 }
This page took 0.153281 seconds and 4 git commands to generate.