block: Fix protocol detection for Windows devices
[qemu.git] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
29 #include "qemu-objects.h"
30
31 #ifdef CONFIG_BSD
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/ioctl.h>
35 #include <sys/queue.h>
36 #ifndef __DragonFly__
37 #include <sys/disk.h>
38 #endif
39 #endif
40
41 #ifdef _WIN32
42 #include <windows.h>
43 #endif
44
45 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
46         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
47         BlockDriverCompletionFunc *cb, void *opaque);
48 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
49         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50         BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
54                         uint8_t *buf, int nb_sectors);
55 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
56                          const uint8_t *buf, int nb_sectors);
57 static BlockDriver *find_protocol(const char *filename);
58
59 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
60     QTAILQ_HEAD_INITIALIZER(bdrv_states);
61
62 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
63     QLIST_HEAD_INITIALIZER(bdrv_drivers);
64
65 /* If non-zero, use only whitelisted block drivers */
66 static int use_bdrv_whitelist;
67
68 int path_is_absolute(const char *path)
69 {
70     const char *p;
71 #ifdef _WIN32
72     /* specific case for names like: "\\.\d:" */
73     if (*path == '/' || *path == '\\')
74         return 1;
75 #endif
76     p = strchr(path, ':');
77     if (p)
78         p++;
79     else
80         p = path;
81 #ifdef _WIN32
82     return (*p == '/' || *p == '\\');
83 #else
84     return (*p == '/');
85 #endif
86 }
87
88 /* if filename is absolute, just copy it to dest. Otherwise, build a
89    path to it by considering it is relative to base_path. URL are
90    supported. */
91 void path_combine(char *dest, int dest_size,
92                   const char *base_path,
93                   const char *filename)
94 {
95     const char *p, *p1;
96     int len;
97
98     if (dest_size <= 0)
99         return;
100     if (path_is_absolute(filename)) {
101         pstrcpy(dest, dest_size, filename);
102     } else {
103         p = strchr(base_path, ':');
104         if (p)
105             p++;
106         else
107             p = base_path;
108         p1 = strrchr(base_path, '/');
109 #ifdef _WIN32
110         {
111             const char *p2;
112             p2 = strrchr(base_path, '\\');
113             if (!p1 || p2 > p1)
114                 p1 = p2;
115         }
116 #endif
117         if (p1)
118             p1++;
119         else
120             p1 = base_path;
121         if (p1 > p)
122             p = p1;
123         len = p - base_path;
124         if (len > dest_size - 1)
125             len = dest_size - 1;
126         memcpy(dest, base_path, len);
127         dest[len] = '\0';
128         pstrcat(dest, dest_size, filename);
129     }
130 }
131
132 void bdrv_register(BlockDriver *bdrv)
133 {
134     if (!bdrv->bdrv_aio_readv) {
135         /* add AIO emulation layer */
136         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
137         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
138     } else if (!bdrv->bdrv_read) {
139         /* add synchronous IO emulation layer */
140         bdrv->bdrv_read = bdrv_read_em;
141         bdrv->bdrv_write = bdrv_write_em;
142     }
143
144     if (!bdrv->bdrv_aio_flush)
145         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
146
147     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
148 }
149
150 /* create a new block device (by default it is empty) */
151 BlockDriverState *bdrv_new(const char *device_name)
152 {
153     BlockDriverState *bs;
154
155     bs = qemu_mallocz(sizeof(BlockDriverState));
156     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
157     if (device_name[0] != '\0') {
158         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
159     }
160     return bs;
161 }
162
163 BlockDriver *bdrv_find_format(const char *format_name)
164 {
165     BlockDriver *drv1;
166     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
167         if (!strcmp(drv1->format_name, format_name)) {
168             return drv1;
169         }
170     }
171     return NULL;
172 }
173
174 static int bdrv_is_whitelisted(BlockDriver *drv)
175 {
176     static const char *whitelist[] = {
177         CONFIG_BDRV_WHITELIST
178     };
179     const char **p;
180
181     if (!whitelist[0])
182         return 1;               /* no whitelist, anything goes */
183
184     for (p = whitelist; *p; p++) {
185         if (!strcmp(drv->format_name, *p)) {
186             return 1;
187         }
188     }
189     return 0;
190 }
191
192 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
193 {
194     BlockDriver *drv = bdrv_find_format(format_name);
195     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
196 }
197
198 int bdrv_create(BlockDriver *drv, const char* filename,
199     QEMUOptionParameter *options)
200 {
201     if (!drv->bdrv_create)
202         return -ENOTSUP;
203
204     return drv->bdrv_create(filename, options);
205 }
206
207 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
208 {
209     BlockDriver *drv;
210
211     drv = find_protocol(filename);
212     if (drv == NULL) {
213         drv = bdrv_find_format("file");
214     }
215
216     return bdrv_create(drv, filename, options);
217 }
218
219 #ifdef _WIN32
220 void get_tmp_filename(char *filename, int size)
221 {
222     char temp_dir[MAX_PATH];
223
224     GetTempPath(MAX_PATH, temp_dir);
225     GetTempFileName(temp_dir, "qem", 0, filename);
226 }
227 #else
228 void get_tmp_filename(char *filename, int size)
229 {
230     int fd;
231     const char *tmpdir;
232     /* XXX: race condition possible */
233     tmpdir = getenv("TMPDIR");
234     if (!tmpdir)
235         tmpdir = "/tmp";
236     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
237     fd = mkstemp(filename);
238     close(fd);
239 }
240 #endif
241
242 #ifdef _WIN32
243 static int is_windows_drive_prefix(const char *filename)
244 {
245     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
246              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
247             filename[1] == ':');
248 }
249
250 int is_windows_drive(const char *filename)
251 {
252     if (is_windows_drive_prefix(filename) &&
253         filename[2] == '\0')
254         return 1;
255     if (strstart(filename, "\\\\.\\", NULL) ||
256         strstart(filename, "//./", NULL))
257         return 1;
258     return 0;
259 }
260 #endif
261
262 /*
263  * Detect host devices. By convention, /dev/cdrom[N] is always
264  * recognized as a host CDROM.
265  */
266 static BlockDriver *find_hdev_driver(const char *filename)
267 {
268     int score_max = 0, score;
269     BlockDriver *drv = NULL, *d;
270
271     QLIST_FOREACH(d, &bdrv_drivers, list) {
272         if (d->bdrv_probe_device) {
273             score = d->bdrv_probe_device(filename);
274             if (score > score_max) {
275                 score_max = score;
276                 drv = d;
277             }
278         }
279     }
280
281     return drv;
282 }
283
284 static BlockDriver *find_protocol(const char *filename)
285 {
286     BlockDriver *drv1;
287     char protocol[128];
288     int len;
289     const char *p;
290     int is_drive;
291
292     /* TODO Drivers without bdrv_file_open must be specified explicitly */
293
294 #ifdef _WIN32
295     is_drive = is_windows_drive(filename) ||
296         is_windows_drive_prefix(filename);
297 #else
298     is_drive = 0;
299 #endif
300     p = strchr(filename, ':');
301     if (!p || is_drive) {
302         drv1 = find_hdev_driver(filename);
303         if (!drv1) {
304             drv1 = bdrv_find_format("file");
305         }
306         return drv1;
307     }
308     len = p - filename;
309     if (len > sizeof(protocol) - 1)
310         len = sizeof(protocol) - 1;
311     memcpy(protocol, filename, len);
312     protocol[len] = '\0';
313     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
314         if (drv1->protocol_name &&
315             !strcmp(drv1->protocol_name, protocol)) {
316             return drv1;
317         }
318     }
319     return NULL;
320 }
321
322 static BlockDriver *find_image_format(const char *filename)
323 {
324     int ret, score, score_max;
325     BlockDriver *drv1, *drv;
326     uint8_t buf[2048];
327     BlockDriverState *bs;
328
329     drv = find_protocol(filename);
330     /* no need to test disk image formats for vvfat */
331     if (drv && strcmp(drv->format_name, "vvfat") == 0)
332         return drv;
333
334     ret = bdrv_file_open(&bs, filename, 0);
335     if (ret < 0)
336         return NULL;
337     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
338     bdrv_delete(bs);
339     if (ret < 0) {
340         return NULL;
341     }
342
343     score_max = 0;
344     drv = NULL;
345     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
346         if (drv1->bdrv_probe) {
347             score = drv1->bdrv_probe(buf, ret, filename);
348             if (score > score_max) {
349                 score_max = score;
350                 drv = drv1;
351             }
352         }
353     }
354     return drv;
355 }
356
357 /**
358  * Set the current 'total_sectors' value
359  */
360 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
361 {
362     BlockDriver *drv = bs->drv;
363
364     /* query actual device if possible, otherwise just trust the hint */
365     if (drv->bdrv_getlength) {
366         int64_t length = drv->bdrv_getlength(bs);
367         if (length < 0) {
368             return length;
369         }
370         hint = length >> BDRV_SECTOR_BITS;
371     }
372
373     bs->total_sectors = hint;
374     return 0;
375 }
376
377 /*
378  * Common part for opening disk images and files
379  */
380 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
381     int flags, BlockDriver *drv)
382 {
383     int ret, open_flags;
384
385     assert(drv != NULL);
386
387     bs->file = NULL;
388     bs->total_sectors = 0;
389     bs->is_temporary = 0;
390     bs->encrypted = 0;
391     bs->valid_key = 0;
392     bs->open_flags = flags;
393     /* buffer_alignment defaulted to 512, drivers can change this value */
394     bs->buffer_alignment = 512;
395
396     pstrcpy(bs->filename, sizeof(bs->filename), filename);
397
398     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
399         return -ENOTSUP;
400     }
401
402     bs->drv = drv;
403     bs->opaque = qemu_mallocz(drv->instance_size);
404
405     /*
406      * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
407      * write cache to the guest.  We do need the fdatasync to flush
408      * out transactions for block allocations, and we maybe have a
409      * volatile write cache in our backing device to deal with.
410      */
411     if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
412         bs->enable_write_cache = 1;
413
414     /*
415      * Clear flags that are internal to the block layer before opening the
416      * image.
417      */
418     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
419
420     /*
421      * Snapshots should be writeable.
422      */
423     if (bs->is_temporary) {
424         open_flags |= BDRV_O_RDWR;
425     }
426
427     /* Open the image, either directly or using a protocol */
428     if (drv->bdrv_file_open) {
429         ret = drv->bdrv_file_open(bs, filename, open_flags);
430     } else {
431         ret = bdrv_file_open(&bs->file, filename, open_flags);
432         if (ret >= 0) {
433             ret = drv->bdrv_open(bs, open_flags);
434         }
435     }
436
437     if (ret < 0) {
438         goto free_and_fail;
439     }
440
441     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
442
443     ret = refresh_total_sectors(bs, bs->total_sectors);
444     if (ret < 0) {
445         goto free_and_fail;
446     }
447
448 #ifndef _WIN32
449     if (bs->is_temporary) {
450         unlink(filename);
451     }
452 #endif
453     return 0;
454
455 free_and_fail:
456     if (bs->file) {
457         bdrv_delete(bs->file);
458         bs->file = NULL;
459     }
460     qemu_free(bs->opaque);
461     bs->opaque = NULL;
462     bs->drv = NULL;
463     return ret;
464 }
465
466 /*
467  * Opens a file using a protocol (file, host_device, nbd, ...)
468  */
469 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
470 {
471     BlockDriverState *bs;
472     BlockDriver *drv;
473     int ret;
474
475     drv = find_protocol(filename);
476     if (!drv) {
477         return -ENOENT;
478     }
479
480     bs = bdrv_new("");
481     ret = bdrv_open_common(bs, filename, flags, drv);
482     if (ret < 0) {
483         bdrv_delete(bs);
484         return ret;
485     }
486     bs->growable = 1;
487     *pbs = bs;
488     return 0;
489 }
490
491 /*
492  * Opens a disk image (raw, qcow2, vmdk, ...)
493  */
494 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
495               BlockDriver *drv)
496 {
497     int ret;
498
499     if (flags & BDRV_O_SNAPSHOT) {
500         BlockDriverState *bs1;
501         int64_t total_size;
502         int is_protocol = 0;
503         BlockDriver *bdrv_qcow2;
504         QEMUOptionParameter *options;
505         char tmp_filename[PATH_MAX];
506         char backing_filename[PATH_MAX];
507
508         /* if snapshot, we create a temporary backing file and open it
509            instead of opening 'filename' directly */
510
511         /* if there is a backing file, use it */
512         bs1 = bdrv_new("");
513         ret = bdrv_open(bs1, filename, 0, drv);
514         if (ret < 0) {
515             bdrv_delete(bs1);
516             return ret;
517         }
518         total_size = bdrv_getlength(bs1) >> BDRV_SECTOR_BITS;
519
520         if (bs1->drv && bs1->drv->protocol_name)
521             is_protocol = 1;
522
523         bdrv_delete(bs1);
524
525         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
526
527         /* Real path is meaningless for protocols */
528         if (is_protocol)
529             snprintf(backing_filename, sizeof(backing_filename),
530                      "%s", filename);
531         else if (!realpath(filename, backing_filename))
532             return -errno;
533
534         bdrv_qcow2 = bdrv_find_format("qcow2");
535         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
536
537         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
538         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
539         if (drv) {
540             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
541                 drv->format_name);
542         }
543
544         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
545         free_option_parameters(options);
546         if (ret < 0) {
547             return ret;
548         }
549
550         filename = tmp_filename;
551         drv = bdrv_qcow2;
552         bs->is_temporary = 1;
553     }
554
555     /* Find the right image format driver */
556     if (!drv) {
557         drv = find_image_format(filename);
558     }
559
560     if (!drv) {
561         ret = -ENOENT;
562         goto unlink_and_fail;
563     }
564
565     /* Open the image */
566     ret = bdrv_open_common(bs, filename, flags, drv);
567     if (ret < 0) {
568         goto unlink_and_fail;
569     }
570
571     /* If there is a backing file, use it */
572     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
573         char backing_filename[PATH_MAX];
574         int back_flags;
575         BlockDriver *back_drv = NULL;
576
577         bs->backing_hd = bdrv_new("");
578         path_combine(backing_filename, sizeof(backing_filename),
579                      filename, bs->backing_file);
580         if (bs->backing_format[0] != '\0')
581             back_drv = bdrv_find_format(bs->backing_format);
582
583         /* backing files always opened read-only */
584         back_flags =
585             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
586
587         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
588         if (ret < 0) {
589             bdrv_close(bs);
590             return ret;
591         }
592         if (bs->is_temporary) {
593             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
594         } else {
595             /* base image inherits from "parent" */
596             bs->backing_hd->keep_read_only = bs->keep_read_only;
597         }
598     }
599
600     if (!bdrv_key_required(bs)) {
601         /* call the change callback */
602         bs->media_changed = 1;
603         if (bs->change_cb)
604             bs->change_cb(bs->change_opaque);
605     }
606
607     return 0;
608
609 unlink_and_fail:
610     if (bs->is_temporary) {
611         unlink(filename);
612     }
613     return ret;
614 }
615
616 void bdrv_close(BlockDriverState *bs)
617 {
618     if (bs->drv) {
619         if (bs->backing_hd) {
620             bdrv_delete(bs->backing_hd);
621             bs->backing_hd = NULL;
622         }
623         bs->drv->bdrv_close(bs);
624         qemu_free(bs->opaque);
625 #ifdef _WIN32
626         if (bs->is_temporary) {
627             unlink(bs->filename);
628         }
629 #endif
630         bs->opaque = NULL;
631         bs->drv = NULL;
632
633         if (bs->file != NULL) {
634             bdrv_close(bs->file);
635         }
636
637         /* call the change callback */
638         bs->media_changed = 1;
639         if (bs->change_cb)
640             bs->change_cb(bs->change_opaque);
641     }
642 }
643
644 void bdrv_delete(BlockDriverState *bs)
645 {
646     /* remove from list, if necessary */
647     if (bs->device_name[0] != '\0') {
648         QTAILQ_REMOVE(&bdrv_states, bs, list);
649     }
650
651     bdrv_close(bs);
652     if (bs->file != NULL) {
653         bdrv_delete(bs->file);
654     }
655
656     qemu_free(bs);
657 }
658
659 /*
660  * Run consistency checks on an image
661  *
662  * Returns the number of errors or -errno when an internal error occurs
663  */
664 int bdrv_check(BlockDriverState *bs)
665 {
666     if (bs->drv->bdrv_check == NULL) {
667         return -ENOTSUP;
668     }
669
670     return bs->drv->bdrv_check(bs);
671 }
672
673 /* commit COW file into the raw image */
674 int bdrv_commit(BlockDriverState *bs)
675 {
676     BlockDriver *drv = bs->drv;
677     int64_t i, total_sectors;
678     int n, j, ro, open_flags;
679     int ret = 0, rw_ret = 0;
680     unsigned char sector[512];
681     char filename[1024];
682     BlockDriverState *bs_rw, *bs_ro;
683
684     if (!drv)
685         return -ENOMEDIUM;
686     
687     if (!bs->backing_hd) {
688         return -ENOTSUP;
689     }
690
691     if (bs->backing_hd->keep_read_only) {
692         return -EACCES;
693     }
694     
695     ro = bs->backing_hd->read_only;
696     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
697     open_flags =  bs->backing_hd->open_flags;
698
699     if (ro) {
700         /* re-open as RW */
701         bdrv_delete(bs->backing_hd);
702         bs->backing_hd = NULL;
703         bs_rw = bdrv_new("");
704         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, NULL);
705         if (rw_ret < 0) {
706             bdrv_delete(bs_rw);
707             /* try to re-open read-only */
708             bs_ro = bdrv_new("");
709             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, NULL);
710             if (ret < 0) {
711                 bdrv_delete(bs_ro);
712                 /* drive not functional anymore */
713                 bs->drv = NULL;
714                 return ret;
715             }
716             bs->backing_hd = bs_ro;
717             return rw_ret;
718         }
719         bs->backing_hd = bs_rw;
720     }
721
722     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
723     for (i = 0; i < total_sectors;) {
724         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
725             for(j = 0; j < n; j++) {
726                 if (bdrv_read(bs, i, sector, 1) != 0) {
727                     ret = -EIO;
728                     goto ro_cleanup;
729                 }
730
731                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
732                     ret = -EIO;
733                     goto ro_cleanup;
734                 }
735                 i++;
736             }
737         } else {
738             i += n;
739         }
740     }
741
742     if (drv->bdrv_make_empty) {
743         ret = drv->bdrv_make_empty(bs);
744         bdrv_flush(bs);
745     }
746
747     /*
748      * Make sure all data we wrote to the backing device is actually
749      * stable on disk.
750      */
751     if (bs->backing_hd)
752         bdrv_flush(bs->backing_hd);
753
754 ro_cleanup:
755
756     if (ro) {
757         /* re-open as RO */
758         bdrv_delete(bs->backing_hd);
759         bs->backing_hd = NULL;
760         bs_ro = bdrv_new("");
761         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, NULL);
762         if (ret < 0) {
763             bdrv_delete(bs_ro);
764             /* drive not functional anymore */
765             bs->drv = NULL;
766             return ret;
767         }
768         bs->backing_hd = bs_ro;
769         bs->backing_hd->keep_read_only = 0;
770     }
771
772     return ret;
773 }
774
775 /*
776  * Return values:
777  * 0        - success
778  * -EINVAL  - backing format specified, but no file
779  * -ENOSPC  - can't update the backing file because no space is left in the
780  *            image file header
781  * -ENOTSUP - format driver doesn't support changing the backing file
782  */
783 int bdrv_change_backing_file(BlockDriverState *bs,
784     const char *backing_file, const char *backing_fmt)
785 {
786     BlockDriver *drv = bs->drv;
787
788     if (drv->bdrv_change_backing_file != NULL) {
789         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
790     } else {
791         return -ENOTSUP;
792     }
793 }
794
795 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
796                                    size_t size)
797 {
798     int64_t len;
799
800     if (!bdrv_is_inserted(bs))
801         return -ENOMEDIUM;
802
803     if (bs->growable)
804         return 0;
805
806     len = bdrv_getlength(bs);
807
808     if (offset < 0)
809         return -EIO;
810
811     if ((offset > len) || (len - offset < size))
812         return -EIO;
813
814     return 0;
815 }
816
817 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
818                               int nb_sectors)
819 {
820     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
821 }
822
823 /* return < 0 if error. See bdrv_write() for the return codes */
824 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
825               uint8_t *buf, int nb_sectors)
826 {
827     BlockDriver *drv = bs->drv;
828
829     if (!drv)
830         return -ENOMEDIUM;
831     if (bdrv_check_request(bs, sector_num, nb_sectors))
832         return -EIO;
833
834     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
835 }
836
837 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
838                              int nb_sectors, int dirty)
839 {
840     int64_t start, end;
841     unsigned long val, idx, bit;
842
843     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
844     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
845
846     for (; start <= end; start++) {
847         idx = start / (sizeof(unsigned long) * 8);
848         bit = start % (sizeof(unsigned long) * 8);
849         val = bs->dirty_bitmap[idx];
850         if (dirty) {
851             if (!(val & (1 << bit))) {
852                 bs->dirty_count++;
853                 val |= 1 << bit;
854             }
855         } else {
856             if (val & (1 << bit)) {
857                 bs->dirty_count--;
858                 val &= ~(1 << bit);
859             }
860         }
861         bs->dirty_bitmap[idx] = val;
862     }
863 }
864
865 /* Return < 0 if error. Important errors are:
866   -EIO         generic I/O error (may happen for all errors)
867   -ENOMEDIUM   No media inserted.
868   -EINVAL      Invalid sector number or nb_sectors
869   -EACCES      Trying to write a read-only device
870 */
871 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
872                const uint8_t *buf, int nb_sectors)
873 {
874     BlockDriver *drv = bs->drv;
875     if (!bs->drv)
876         return -ENOMEDIUM;
877     if (bs->read_only)
878         return -EACCES;
879     if (bdrv_check_request(bs, sector_num, nb_sectors))
880         return -EIO;
881
882     if (bs->dirty_bitmap) {
883         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
884     }
885
886     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
887         bs->wr_highest_sector = sector_num + nb_sectors - 1;
888     }
889
890     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
891 }
892
893 int bdrv_pread(BlockDriverState *bs, int64_t offset,
894                void *buf, int count1)
895 {
896     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
897     int len, nb_sectors, count;
898     int64_t sector_num;
899     int ret;
900
901     count = count1;
902     /* first read to align to sector start */
903     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
904     if (len > count)
905         len = count;
906     sector_num = offset >> BDRV_SECTOR_BITS;
907     if (len > 0) {
908         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
909             return ret;
910         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
911         count -= len;
912         if (count == 0)
913             return count1;
914         sector_num++;
915         buf += len;
916     }
917
918     /* read the sectors "in place" */
919     nb_sectors = count >> BDRV_SECTOR_BITS;
920     if (nb_sectors > 0) {
921         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
922             return ret;
923         sector_num += nb_sectors;
924         len = nb_sectors << BDRV_SECTOR_BITS;
925         buf += len;
926         count -= len;
927     }
928
929     /* add data from the last sector */
930     if (count > 0) {
931         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
932             return ret;
933         memcpy(buf, tmp_buf, count);
934     }
935     return count1;
936 }
937
938 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
939                 const void *buf, int count1)
940 {
941     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
942     int len, nb_sectors, count;
943     int64_t sector_num;
944     int ret;
945
946     count = count1;
947     /* first write to align to sector start */
948     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
949     if (len > count)
950         len = count;
951     sector_num = offset >> BDRV_SECTOR_BITS;
952     if (len > 0) {
953         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
954             return ret;
955         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
956         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
957             return ret;
958         count -= len;
959         if (count == 0)
960             return count1;
961         sector_num++;
962         buf += len;
963     }
964
965     /* write the sectors "in place" */
966     nb_sectors = count >> BDRV_SECTOR_BITS;
967     if (nb_sectors > 0) {
968         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
969             return ret;
970         sector_num += nb_sectors;
971         len = nb_sectors << BDRV_SECTOR_BITS;
972         buf += len;
973         count -= len;
974     }
975
976     /* add data from the last sector */
977     if (count > 0) {
978         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
979             return ret;
980         memcpy(tmp_buf, buf, count);
981         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
982             return ret;
983     }
984     return count1;
985 }
986
987 /**
988  * Truncate file to 'offset' bytes (needed only for file protocols)
989  */
990 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
991 {
992     BlockDriver *drv = bs->drv;
993     int ret;
994     if (!drv)
995         return -ENOMEDIUM;
996     if (!drv->bdrv_truncate)
997         return -ENOTSUP;
998     if (bs->read_only)
999         return -EACCES;
1000     ret = drv->bdrv_truncate(bs, offset);
1001     if (ret == 0) {
1002         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1003     }
1004     return ret;
1005 }
1006
1007 /**
1008  * Length of a file in bytes. Return < 0 if error or unknown.
1009  */
1010 int64_t bdrv_getlength(BlockDriverState *bs)
1011 {
1012     BlockDriver *drv = bs->drv;
1013     if (!drv)
1014         return -ENOMEDIUM;
1015
1016     /* Fixed size devices use the total_sectors value for speed instead of
1017        issuing a length query (like lseek) on each call.  Also, legacy block
1018        drivers don't provide a bdrv_getlength function and must use
1019        total_sectors. */
1020     if (!bs->growable || !drv->bdrv_getlength) {
1021         return bs->total_sectors * BDRV_SECTOR_SIZE;
1022     }
1023     return drv->bdrv_getlength(bs);
1024 }
1025
1026 /* return 0 as number of sectors if no device present or error */
1027 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1028 {
1029     int64_t length;
1030     length = bdrv_getlength(bs);
1031     if (length < 0)
1032         length = 0;
1033     else
1034         length = length >> BDRV_SECTOR_BITS;
1035     *nb_sectors_ptr = length;
1036 }
1037
1038 struct partition {
1039         uint8_t boot_ind;           /* 0x80 - active */
1040         uint8_t head;               /* starting head */
1041         uint8_t sector;             /* starting sector */
1042         uint8_t cyl;                /* starting cylinder */
1043         uint8_t sys_ind;            /* What partition type */
1044         uint8_t end_head;           /* end head */
1045         uint8_t end_sector;         /* end sector */
1046         uint8_t end_cyl;            /* end cylinder */
1047         uint32_t start_sect;        /* starting sector counting from 0 */
1048         uint32_t nr_sects;          /* nr of sectors in partition */
1049 } __attribute__((packed));
1050
1051 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1052 static int guess_disk_lchs(BlockDriverState *bs,
1053                            int *pcylinders, int *pheads, int *psectors)
1054 {
1055     uint8_t buf[512];
1056     int ret, i, heads, sectors, cylinders;
1057     struct partition *p;
1058     uint32_t nr_sects;
1059     uint64_t nb_sectors;
1060
1061     bdrv_get_geometry(bs, &nb_sectors);
1062
1063     ret = bdrv_read(bs, 0, buf, 1);
1064     if (ret < 0)
1065         return -1;
1066     /* test msdos magic */
1067     if (buf[510] != 0x55 || buf[511] != 0xaa)
1068         return -1;
1069     for(i = 0; i < 4; i++) {
1070         p = ((struct partition *)(buf + 0x1be)) + i;
1071         nr_sects = le32_to_cpu(p->nr_sects);
1072         if (nr_sects && p->end_head) {
1073             /* We make the assumption that the partition terminates on
1074                a cylinder boundary */
1075             heads = p->end_head + 1;
1076             sectors = p->end_sector & 63;
1077             if (sectors == 0)
1078                 continue;
1079             cylinders = nb_sectors / (heads * sectors);
1080             if (cylinders < 1 || cylinders > 16383)
1081                 continue;
1082             *pheads = heads;
1083             *psectors = sectors;
1084             *pcylinders = cylinders;
1085 #if 0
1086             printf("guessed geometry: LCHS=%d %d %d\n",
1087                    cylinders, heads, sectors);
1088 #endif
1089             return 0;
1090         }
1091     }
1092     return -1;
1093 }
1094
1095 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1096 {
1097     int translation, lba_detected = 0;
1098     int cylinders, heads, secs;
1099     uint64_t nb_sectors;
1100
1101     /* if a geometry hint is available, use it */
1102     bdrv_get_geometry(bs, &nb_sectors);
1103     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1104     translation = bdrv_get_translation_hint(bs);
1105     if (cylinders != 0) {
1106         *pcyls = cylinders;
1107         *pheads = heads;
1108         *psecs = secs;
1109     } else {
1110         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1111             if (heads > 16) {
1112                 /* if heads > 16, it means that a BIOS LBA
1113                    translation was active, so the default
1114                    hardware geometry is OK */
1115                 lba_detected = 1;
1116                 goto default_geometry;
1117             } else {
1118                 *pcyls = cylinders;
1119                 *pheads = heads;
1120                 *psecs = secs;
1121                 /* disable any translation to be in sync with
1122                    the logical geometry */
1123                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1124                     bdrv_set_translation_hint(bs,
1125                                               BIOS_ATA_TRANSLATION_NONE);
1126                 }
1127             }
1128         } else {
1129         default_geometry:
1130             /* if no geometry, use a standard physical disk geometry */
1131             cylinders = nb_sectors / (16 * 63);
1132
1133             if (cylinders > 16383)
1134                 cylinders = 16383;
1135             else if (cylinders < 2)
1136                 cylinders = 2;
1137             *pcyls = cylinders;
1138             *pheads = 16;
1139             *psecs = 63;
1140             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1141                 if ((*pcyls * *pheads) <= 131072) {
1142                     bdrv_set_translation_hint(bs,
1143                                               BIOS_ATA_TRANSLATION_LARGE);
1144                 } else {
1145                     bdrv_set_translation_hint(bs,
1146                                               BIOS_ATA_TRANSLATION_LBA);
1147                 }
1148             }
1149         }
1150         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1151     }
1152 }
1153
1154 void bdrv_set_geometry_hint(BlockDriverState *bs,
1155                             int cyls, int heads, int secs)
1156 {
1157     bs->cyls = cyls;
1158     bs->heads = heads;
1159     bs->secs = secs;
1160 }
1161
1162 void bdrv_set_type_hint(BlockDriverState *bs, int type)
1163 {
1164     bs->type = type;
1165     bs->removable = ((type == BDRV_TYPE_CDROM ||
1166                       type == BDRV_TYPE_FLOPPY));
1167 }
1168
1169 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1170 {
1171     bs->translation = translation;
1172 }
1173
1174 void bdrv_get_geometry_hint(BlockDriverState *bs,
1175                             int *pcyls, int *pheads, int *psecs)
1176 {
1177     *pcyls = bs->cyls;
1178     *pheads = bs->heads;
1179     *psecs = bs->secs;
1180 }
1181
1182 int bdrv_get_type_hint(BlockDriverState *bs)
1183 {
1184     return bs->type;
1185 }
1186
1187 int bdrv_get_translation_hint(BlockDriverState *bs)
1188 {
1189     return bs->translation;
1190 }
1191
1192 int bdrv_is_removable(BlockDriverState *bs)
1193 {
1194     return bs->removable;
1195 }
1196
1197 int bdrv_is_read_only(BlockDriverState *bs)
1198 {
1199     return bs->read_only;
1200 }
1201
1202 int bdrv_is_sg(BlockDriverState *bs)
1203 {
1204     return bs->sg;
1205 }
1206
1207 int bdrv_enable_write_cache(BlockDriverState *bs)
1208 {
1209     return bs->enable_write_cache;
1210 }
1211
1212 /* XXX: no longer used */
1213 void bdrv_set_change_cb(BlockDriverState *bs,
1214                         void (*change_cb)(void *opaque), void *opaque)
1215 {
1216     bs->change_cb = change_cb;
1217     bs->change_opaque = opaque;
1218 }
1219
1220 int bdrv_is_encrypted(BlockDriverState *bs)
1221 {
1222     if (bs->backing_hd && bs->backing_hd->encrypted)
1223         return 1;
1224     return bs->encrypted;
1225 }
1226
1227 int bdrv_key_required(BlockDriverState *bs)
1228 {
1229     BlockDriverState *backing_hd = bs->backing_hd;
1230
1231     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1232         return 1;
1233     return (bs->encrypted && !bs->valid_key);
1234 }
1235
1236 int bdrv_set_key(BlockDriverState *bs, const char *key)
1237 {
1238     int ret;
1239     if (bs->backing_hd && bs->backing_hd->encrypted) {
1240         ret = bdrv_set_key(bs->backing_hd, key);
1241         if (ret < 0)
1242             return ret;
1243         if (!bs->encrypted)
1244             return 0;
1245     }
1246     if (!bs->encrypted) {
1247         return -EINVAL;
1248     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1249         return -ENOMEDIUM;
1250     }
1251     ret = bs->drv->bdrv_set_key(bs, key);
1252     if (ret < 0) {
1253         bs->valid_key = 0;
1254     } else if (!bs->valid_key) {
1255         bs->valid_key = 1;
1256         /* call the change callback now, we skipped it on open */
1257         bs->media_changed = 1;
1258         if (bs->change_cb)
1259             bs->change_cb(bs->change_opaque);
1260     }
1261     return ret;
1262 }
1263
1264 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1265 {
1266     if (!bs->drv) {
1267         buf[0] = '\0';
1268     } else {
1269         pstrcpy(buf, buf_size, bs->drv->format_name);
1270     }
1271 }
1272
1273 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1274                          void *opaque)
1275 {
1276     BlockDriver *drv;
1277
1278     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1279         it(opaque, drv->format_name);
1280     }
1281 }
1282
1283 BlockDriverState *bdrv_find(const char *name)
1284 {
1285     BlockDriverState *bs;
1286
1287     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1288         if (!strcmp(name, bs->device_name)) {
1289             return bs;
1290         }
1291     }
1292     return NULL;
1293 }
1294
1295 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1296 {
1297     BlockDriverState *bs;
1298
1299     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1300         it(opaque, bs);
1301     }
1302 }
1303
1304 const char *bdrv_get_device_name(BlockDriverState *bs)
1305 {
1306     return bs->device_name;
1307 }
1308
1309 void bdrv_flush(BlockDriverState *bs)
1310 {
1311     if (bs->drv && bs->drv->bdrv_flush)
1312         bs->drv->bdrv_flush(bs);
1313 }
1314
1315 void bdrv_flush_all(void)
1316 {
1317     BlockDriverState *bs;
1318
1319     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1320         if (bs->drv && !bdrv_is_read_only(bs) &&
1321             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1322             bdrv_flush(bs);
1323         }
1324     }
1325 }
1326
1327 int bdrv_has_zero_init(BlockDriverState *bs)
1328 {
1329     assert(bs->drv);
1330
1331     if (bs->drv->no_zero_init) {
1332         return 0;
1333     } else if (bs->file) {
1334         return bdrv_has_zero_init(bs->file);
1335     }
1336
1337     return 1;
1338 }
1339
1340 /*
1341  * Returns true iff the specified sector is present in the disk image. Drivers
1342  * not implementing the functionality are assumed to not support backing files,
1343  * hence all their sectors are reported as allocated.
1344  *
1345  * 'pnum' is set to the number of sectors (including and immediately following
1346  * the specified sector) that are known to be in the same
1347  * allocated/unallocated state.
1348  *
1349  * 'nb_sectors' is the max value 'pnum' should be set to.
1350  */
1351 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1352         int *pnum)
1353 {
1354     int64_t n;
1355     if (!bs->drv->bdrv_is_allocated) {
1356         if (sector_num >= bs->total_sectors) {
1357             *pnum = 0;
1358             return 0;
1359         }
1360         n = bs->total_sectors - sector_num;
1361         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1362         return 1;
1363     }
1364     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1365 }
1366
1367 void bdrv_mon_event(const BlockDriverState *bdrv,
1368                     BlockMonEventAction action, int is_read)
1369 {
1370     QObject *data;
1371     const char *action_str;
1372
1373     switch (action) {
1374     case BDRV_ACTION_REPORT:
1375         action_str = "report";
1376         break;
1377     case BDRV_ACTION_IGNORE:
1378         action_str = "ignore";
1379         break;
1380     case BDRV_ACTION_STOP:
1381         action_str = "stop";
1382         break;
1383     default:
1384         abort();
1385     }
1386
1387     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1388                               bdrv->device_name,
1389                               action_str,
1390                               is_read ? "read" : "write");
1391     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1392
1393     qobject_decref(data);
1394 }
1395
1396 static void bdrv_print_dict(QObject *obj, void *opaque)
1397 {
1398     QDict *bs_dict;
1399     Monitor *mon = opaque;
1400
1401     bs_dict = qobject_to_qdict(obj);
1402
1403     monitor_printf(mon, "%s: type=%s removable=%d",
1404                         qdict_get_str(bs_dict, "device"),
1405                         qdict_get_str(bs_dict, "type"),
1406                         qdict_get_bool(bs_dict, "removable"));
1407
1408     if (qdict_get_bool(bs_dict, "removable")) {
1409         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1410     }
1411
1412     if (qdict_haskey(bs_dict, "inserted")) {
1413         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1414
1415         monitor_printf(mon, " file=");
1416         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1417         if (qdict_haskey(qdict, "backing_file")) {
1418             monitor_printf(mon, " backing_file=");
1419             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1420         }
1421         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1422                             qdict_get_bool(qdict, "ro"),
1423                             qdict_get_str(qdict, "drv"),
1424                             qdict_get_bool(qdict, "encrypted"));
1425     } else {
1426         monitor_printf(mon, " [not inserted]");
1427     }
1428
1429     monitor_printf(mon, "\n");
1430 }
1431
1432 void bdrv_info_print(Monitor *mon, const QObject *data)
1433 {
1434     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1435 }
1436
1437 /**
1438  * bdrv_info(): Block devices information
1439  *
1440  * Each block device information is stored in a QDict and the
1441  * returned QObject is a QList of all devices.
1442  *
1443  * The QDict contains the following:
1444  *
1445  * - "device": device name
1446  * - "type": device type
1447  * - "removable": true if the device is removable, false otherwise
1448  * - "locked": true if the device is locked, false otherwise
1449  * - "inserted": only present if the device is inserted, it is a QDict
1450  *    containing the following:
1451  *          - "file": device file name
1452  *          - "ro": true if read-only, false otherwise
1453  *          - "drv": driver format name
1454  *          - "backing_file": backing file name if one is used
1455  *          - "encrypted": true if encrypted, false otherwise
1456  *
1457  * Example:
1458  *
1459  * [ { "device": "ide0-hd0", "type": "hd", "removable": false, "locked": false,
1460  *     "inserted": { "file": "/tmp/foobar", "ro": false, "drv": "qcow2" } },
1461  *   { "device": "floppy0", "type": "floppy", "removable": true,
1462  *     "locked": false } ]
1463  */
1464 void bdrv_info(Monitor *mon, QObject **ret_data)
1465 {
1466     QList *bs_list;
1467     BlockDriverState *bs;
1468
1469     bs_list = qlist_new();
1470
1471     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1472         QObject *bs_obj;
1473         const char *type = "unknown";
1474
1475         switch(bs->type) {
1476         case BDRV_TYPE_HD:
1477             type = "hd";
1478             break;
1479         case BDRV_TYPE_CDROM:
1480             type = "cdrom";
1481             break;
1482         case BDRV_TYPE_FLOPPY:
1483             type = "floppy";
1484             break;
1485         }
1486
1487         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, "
1488                                     "'removable': %i, 'locked': %i }",
1489                                     bs->device_name, type, bs->removable,
1490                                     bs->locked);
1491
1492         if (bs->drv) {
1493             QObject *obj;
1494             QDict *bs_dict = qobject_to_qdict(bs_obj);
1495
1496             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1497                                      "'encrypted': %i }",
1498                                      bs->filename, bs->read_only,
1499                                      bs->drv->format_name,
1500                                      bdrv_is_encrypted(bs));
1501             if (bs->backing_file[0] != '\0') {
1502                 QDict *qdict = qobject_to_qdict(obj);
1503                 qdict_put(qdict, "backing_file",
1504                           qstring_from_str(bs->backing_file));
1505             }
1506
1507             qdict_put_obj(bs_dict, "inserted", obj);
1508         }
1509         qlist_append_obj(bs_list, bs_obj);
1510     }
1511
1512     *ret_data = QOBJECT(bs_list);
1513 }
1514
1515 static void bdrv_stats_iter(QObject *data, void *opaque)
1516 {
1517     QDict *qdict;
1518     Monitor *mon = opaque;
1519
1520     qdict = qobject_to_qdict(data);
1521     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1522
1523     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1524     monitor_printf(mon, " rd_bytes=%" PRId64
1525                         " wr_bytes=%" PRId64
1526                         " rd_operations=%" PRId64
1527                         " wr_operations=%" PRId64
1528                         "\n",
1529                         qdict_get_int(qdict, "rd_bytes"),
1530                         qdict_get_int(qdict, "wr_bytes"),
1531                         qdict_get_int(qdict, "rd_operations"),
1532                         qdict_get_int(qdict, "wr_operations"));
1533 }
1534
1535 void bdrv_stats_print(Monitor *mon, const QObject *data)
1536 {
1537     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1538 }
1539
1540 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1541 {
1542     QObject *res;
1543     QDict *dict;
1544
1545     res = qobject_from_jsonf("{ 'stats': {"
1546                              "'rd_bytes': %" PRId64 ","
1547                              "'wr_bytes': %" PRId64 ","
1548                              "'rd_operations': %" PRId64 ","
1549                              "'wr_operations': %" PRId64 ","
1550                              "'wr_highest_offset': %" PRId64
1551                              "} }",
1552                              bs->rd_bytes, bs->wr_bytes,
1553                              bs->rd_ops, bs->wr_ops,
1554                              bs->wr_highest_sector * 512);
1555     dict  = qobject_to_qdict(res);
1556
1557     if (*bs->device_name) {
1558         qdict_put(dict, "device", qstring_from_str(bs->device_name));
1559     }
1560
1561     if (bs->file) {
1562         QObject *parent = bdrv_info_stats_bs(bs->file);
1563         qdict_put_obj(dict, "parent", parent);
1564     }
1565
1566     return res;
1567 }
1568
1569 /**
1570  * bdrv_info_stats(): show block device statistics
1571  *
1572  * Each device statistic information is stored in a QDict and
1573  * the returned QObject is a QList of all devices.
1574  *
1575  * The QDict contains the following:
1576  *
1577  * - "device": device name
1578  * - "stats": A QDict with the statistics information, it contains:
1579  *     - "rd_bytes": bytes read
1580  *     - "wr_bytes": bytes written
1581  *     - "rd_operations": read operations
1582  *     - "wr_operations": write operations
1583  *     - "wr_highest_offset": Highest offset of a sector written since the
1584  *       BlockDriverState has been opened
1585  *     - "parent": Contains recursively the statistics of the underlying
1586  *       protocol (e.g. the host file for a qcow2 image). If there is no
1587  *       underlying protocol, this field is omitted.
1588  *
1589  * Example:
1590  *
1591  * [ { "device": "ide0-hd0",
1592  *               "stats": { "rd_bytes": 512,
1593  *                          "wr_bytes": 0,
1594  *                          "rd_operations": 1,
1595  *                          "wr_operations": 0,
1596  *                          "wr_highest_offset": 0,
1597  *                          "parent": {
1598  *                              "stats": { "rd_bytes": 1024,
1599  *                                         "wr_bytes": 0,
1600  *                                         "rd_operations": 2,
1601  *                                         "wr_operations": 0,
1602  *                                         "wr_highest_offset": 0,
1603  *                              }
1604  *                          } } },
1605  *   { "device": "ide1-cd0",
1606  *               "stats": { "rd_bytes": 0,
1607  *                          "wr_bytes": 0,
1608  *                          "rd_operations": 0,
1609  *                          "wr_operations": 0,
1610  *                          "wr_highest_offset": 0 } },
1611  */
1612 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1613 {
1614     QObject *obj;
1615     QList *devices;
1616     BlockDriverState *bs;
1617
1618     devices = qlist_new();
1619
1620     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1621         obj = bdrv_info_stats_bs(bs);
1622         qlist_append_obj(devices, obj);
1623     }
1624
1625     *ret_data = QOBJECT(devices);
1626 }
1627
1628 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1629 {
1630     if (bs->backing_hd && bs->backing_hd->encrypted)
1631         return bs->backing_file;
1632     else if (bs->encrypted)
1633         return bs->filename;
1634     else
1635         return NULL;
1636 }
1637
1638 void bdrv_get_backing_filename(BlockDriverState *bs,
1639                                char *filename, int filename_size)
1640 {
1641     if (!bs->backing_file) {
1642         pstrcpy(filename, filename_size, "");
1643     } else {
1644         pstrcpy(filename, filename_size, bs->backing_file);
1645     }
1646 }
1647
1648 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1649                           const uint8_t *buf, int nb_sectors)
1650 {
1651     BlockDriver *drv = bs->drv;
1652     if (!drv)
1653         return -ENOMEDIUM;
1654     if (!drv->bdrv_write_compressed)
1655         return -ENOTSUP;
1656     if (bdrv_check_request(bs, sector_num, nb_sectors))
1657         return -EIO;
1658
1659     if (bs->dirty_bitmap) {
1660         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1661     }
1662
1663     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1664 }
1665
1666 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1667 {
1668     BlockDriver *drv = bs->drv;
1669     if (!drv)
1670         return -ENOMEDIUM;
1671     if (!drv->bdrv_get_info)
1672         return -ENOTSUP;
1673     memset(bdi, 0, sizeof(*bdi));
1674     return drv->bdrv_get_info(bs, bdi);
1675 }
1676
1677 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1678                       int64_t pos, int size)
1679 {
1680     BlockDriver *drv = bs->drv;
1681     if (!drv)
1682         return -ENOMEDIUM;
1683     if (!drv->bdrv_save_vmstate)
1684         return -ENOTSUP;
1685     return drv->bdrv_save_vmstate(bs, buf, pos, size);
1686 }
1687
1688 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1689                       int64_t pos, int size)
1690 {
1691     BlockDriver *drv = bs->drv;
1692     if (!drv)
1693         return -ENOMEDIUM;
1694     if (!drv->bdrv_load_vmstate)
1695         return -ENOTSUP;
1696     return drv->bdrv_load_vmstate(bs, buf, pos, size);
1697 }
1698
1699 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1700 {
1701     BlockDriver *drv = bs->drv;
1702
1703     if (!drv || !drv->bdrv_debug_event) {
1704         return;
1705     }
1706
1707     return drv->bdrv_debug_event(bs, event);
1708
1709 }
1710
1711 /**************************************************************/
1712 /* handling of snapshots */
1713
1714 int bdrv_snapshot_create(BlockDriverState *bs,
1715                          QEMUSnapshotInfo *sn_info)
1716 {
1717     BlockDriver *drv = bs->drv;
1718     if (!drv)
1719         return -ENOMEDIUM;
1720     if (!drv->bdrv_snapshot_create)
1721         return -ENOTSUP;
1722     return drv->bdrv_snapshot_create(bs, sn_info);
1723 }
1724
1725 int bdrv_snapshot_goto(BlockDriverState *bs,
1726                        const char *snapshot_id)
1727 {
1728     BlockDriver *drv = bs->drv;
1729     if (!drv)
1730         return -ENOMEDIUM;
1731     if (!drv->bdrv_snapshot_goto)
1732         return -ENOTSUP;
1733     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1734 }
1735
1736 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1737 {
1738     BlockDriver *drv = bs->drv;
1739     if (!drv)
1740         return -ENOMEDIUM;
1741     if (!drv->bdrv_snapshot_delete)
1742         return -ENOTSUP;
1743     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1744 }
1745
1746 int bdrv_snapshot_list(BlockDriverState *bs,
1747                        QEMUSnapshotInfo **psn_info)
1748 {
1749     BlockDriver *drv = bs->drv;
1750     if (!drv)
1751         return -ENOMEDIUM;
1752     if (!drv->bdrv_snapshot_list)
1753         return -ENOTSUP;
1754     return drv->bdrv_snapshot_list(bs, psn_info);
1755 }
1756
1757 #define NB_SUFFIXES 4
1758
1759 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1760 {
1761     static const char suffixes[NB_SUFFIXES] = "KMGT";
1762     int64_t base;
1763     int i;
1764
1765     if (size <= 999) {
1766         snprintf(buf, buf_size, "%" PRId64, size);
1767     } else {
1768         base = 1024;
1769         for(i = 0; i < NB_SUFFIXES; i++) {
1770             if (size < (10 * base)) {
1771                 snprintf(buf, buf_size, "%0.1f%c",
1772                          (double)size / base,
1773                          suffixes[i]);
1774                 break;
1775             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1776                 snprintf(buf, buf_size, "%" PRId64 "%c",
1777                          ((size + (base >> 1)) / base),
1778                          suffixes[i]);
1779                 break;
1780             }
1781             base = base * 1024;
1782         }
1783     }
1784     return buf;
1785 }
1786
1787 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1788 {
1789     char buf1[128], date_buf[128], clock_buf[128];
1790 #ifdef _WIN32
1791     struct tm *ptm;
1792 #else
1793     struct tm tm;
1794 #endif
1795     time_t ti;
1796     int64_t secs;
1797
1798     if (!sn) {
1799         snprintf(buf, buf_size,
1800                  "%-10s%-20s%7s%20s%15s",
1801                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1802     } else {
1803         ti = sn->date_sec;
1804 #ifdef _WIN32
1805         ptm = localtime(&ti);
1806         strftime(date_buf, sizeof(date_buf),
1807                  "%Y-%m-%d %H:%M:%S", ptm);
1808 #else
1809         localtime_r(&ti, &tm);
1810         strftime(date_buf, sizeof(date_buf),
1811                  "%Y-%m-%d %H:%M:%S", &tm);
1812 #endif
1813         secs = sn->vm_clock_nsec / 1000000000;
1814         snprintf(clock_buf, sizeof(clock_buf),
1815                  "%02d:%02d:%02d.%03d",
1816                  (int)(secs / 3600),
1817                  (int)((secs / 60) % 60),
1818                  (int)(secs % 60),
1819                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1820         snprintf(buf, buf_size,
1821                  "%-10s%-20s%7s%20s%15s",
1822                  sn->id_str, sn->name,
1823                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1824                  date_buf,
1825                  clock_buf);
1826     }
1827     return buf;
1828 }
1829
1830
1831 /**************************************************************/
1832 /* async I/Os */
1833
1834 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1835                                  QEMUIOVector *qiov, int nb_sectors,
1836                                  BlockDriverCompletionFunc *cb, void *opaque)
1837 {
1838     BlockDriver *drv = bs->drv;
1839     BlockDriverAIOCB *ret;
1840
1841     if (!drv)
1842         return NULL;
1843     if (bdrv_check_request(bs, sector_num, nb_sectors))
1844         return NULL;
1845
1846     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1847                               cb, opaque);
1848
1849     if (ret) {
1850         /* Update stats even though technically transfer has not happened. */
1851         bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1852         bs->rd_ops ++;
1853     }
1854
1855     return ret;
1856 }
1857
1858 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1859                                   QEMUIOVector *qiov, int nb_sectors,
1860                                   BlockDriverCompletionFunc *cb, void *opaque)
1861 {
1862     BlockDriver *drv = bs->drv;
1863     BlockDriverAIOCB *ret;
1864
1865     if (!drv)
1866         return NULL;
1867     if (bs->read_only)
1868         return NULL;
1869     if (bdrv_check_request(bs, sector_num, nb_sectors))
1870         return NULL;
1871
1872     if (bs->dirty_bitmap) {
1873         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1874     }
1875
1876     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1877                                cb, opaque);
1878
1879     if (ret) {
1880         /* Update stats even though technically transfer has not happened. */
1881         bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1882         bs->wr_ops ++;
1883         if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1884             bs->wr_highest_sector = sector_num + nb_sectors - 1;
1885         }
1886     }
1887
1888     return ret;
1889 }
1890
1891
1892 typedef struct MultiwriteCB {
1893     int error;
1894     int num_requests;
1895     int num_callbacks;
1896     struct {
1897         BlockDriverCompletionFunc *cb;
1898         void *opaque;
1899         QEMUIOVector *free_qiov;
1900         void *free_buf;
1901     } callbacks[];
1902 } MultiwriteCB;
1903
1904 static void multiwrite_user_cb(MultiwriteCB *mcb)
1905 {
1906     int i;
1907
1908     for (i = 0; i < mcb->num_callbacks; i++) {
1909         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1910         if (mcb->callbacks[i].free_qiov) {
1911             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
1912         }
1913         qemu_free(mcb->callbacks[i].free_qiov);
1914         qemu_vfree(mcb->callbacks[i].free_buf);
1915     }
1916 }
1917
1918 static void multiwrite_cb(void *opaque, int ret)
1919 {
1920     MultiwriteCB *mcb = opaque;
1921
1922     if (ret < 0 && !mcb->error) {
1923         mcb->error = ret;
1924         multiwrite_user_cb(mcb);
1925     }
1926
1927     mcb->num_requests--;
1928     if (mcb->num_requests == 0) {
1929         if (mcb->error == 0) {
1930             multiwrite_user_cb(mcb);
1931         }
1932         qemu_free(mcb);
1933     }
1934 }
1935
1936 static int multiwrite_req_compare(const void *a, const void *b)
1937 {
1938     return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector);
1939 }
1940
1941 /*
1942  * Takes a bunch of requests and tries to merge them. Returns the number of
1943  * requests that remain after merging.
1944  */
1945 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
1946     int num_reqs, MultiwriteCB *mcb)
1947 {
1948     int i, outidx;
1949
1950     // Sort requests by start sector
1951     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
1952
1953     // Check if adjacent requests touch the same clusters. If so, combine them,
1954     // filling up gaps with zero sectors.
1955     outidx = 0;
1956     for (i = 1; i < num_reqs; i++) {
1957         int merge = 0;
1958         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
1959
1960         // This handles the cases that are valid for all block drivers, namely
1961         // exactly sequential writes and overlapping writes.
1962         if (reqs[i].sector <= oldreq_last) {
1963             merge = 1;
1964         }
1965
1966         // The block driver may decide that it makes sense to combine requests
1967         // even if there is a gap of some sectors between them. In this case,
1968         // the gap is filled with zeros (therefore only applicable for yet
1969         // unused space in format like qcow2).
1970         if (!merge && bs->drv->bdrv_merge_requests) {
1971             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
1972         }
1973
1974         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
1975             merge = 0;
1976         }
1977
1978         if (merge) {
1979             size_t size;
1980             QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
1981             qemu_iovec_init(qiov,
1982                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
1983
1984             // Add the first request to the merged one. If the requests are
1985             // overlapping, drop the last sectors of the first request.
1986             size = (reqs[i].sector - reqs[outidx].sector) << 9;
1987             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
1988
1989             // We might need to add some zeros between the two requests
1990             if (reqs[i].sector > oldreq_last) {
1991                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
1992                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
1993                 memset(buf, 0, zero_bytes);
1994                 qemu_iovec_add(qiov, buf, zero_bytes);
1995                 mcb->callbacks[i].free_buf = buf;
1996             }
1997
1998             // Add the second request
1999             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2000
2001             reqs[outidx].nb_sectors += reqs[i].nb_sectors;
2002             reqs[outidx].qiov = qiov;
2003
2004             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2005         } else {
2006             outidx++;
2007             reqs[outidx].sector     = reqs[i].sector;
2008             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2009             reqs[outidx].qiov       = reqs[i].qiov;
2010         }
2011     }
2012
2013     return outidx + 1;
2014 }
2015
2016 /*
2017  * Submit multiple AIO write requests at once.
2018  *
2019  * On success, the function returns 0 and all requests in the reqs array have
2020  * been submitted. In error case this function returns -1, and any of the
2021  * requests may or may not be submitted yet. In particular, this means that the
2022  * callback will be called for some of the requests, for others it won't. The
2023  * caller must check the error field of the BlockRequest to wait for the right
2024  * callbacks (if error != 0, no callback will be called).
2025  *
2026  * The implementation may modify the contents of the reqs array, e.g. to merge
2027  * requests. However, the fields opaque and error are left unmodified as they
2028  * are used to signal failure for a single request to the caller.
2029  */
2030 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2031 {
2032     BlockDriverAIOCB *acb;
2033     MultiwriteCB *mcb;
2034     int i;
2035
2036     if (num_reqs == 0) {
2037         return 0;
2038     }
2039
2040     // Create MultiwriteCB structure
2041     mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2042     mcb->num_requests = 0;
2043     mcb->num_callbacks = num_reqs;
2044
2045     for (i = 0; i < num_reqs; i++) {
2046         mcb->callbacks[i].cb = reqs[i].cb;
2047         mcb->callbacks[i].opaque = reqs[i].opaque;
2048     }
2049
2050     // Check for mergable requests
2051     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2052
2053     // Run the aio requests
2054     for (i = 0; i < num_reqs; i++) {
2055         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2056             reqs[i].nb_sectors, multiwrite_cb, mcb);
2057
2058         if (acb == NULL) {
2059             // We can only fail the whole thing if no request has been
2060             // submitted yet. Otherwise we'll wait for the submitted AIOs to
2061             // complete and report the error in the callback.
2062             if (mcb->num_requests == 0) {
2063                 reqs[i].error = -EIO;
2064                 goto fail;
2065             } else {
2066                 mcb->num_requests++;
2067                 multiwrite_cb(mcb, -EIO);
2068                 break;
2069             }
2070         } else {
2071             mcb->num_requests++;
2072         }
2073     }
2074
2075     return 0;
2076
2077 fail:
2078     free(mcb);
2079     return -1;
2080 }
2081
2082 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2083         BlockDriverCompletionFunc *cb, void *opaque)
2084 {
2085     BlockDriver *drv = bs->drv;
2086
2087     if (!drv)
2088         return NULL;
2089     return drv->bdrv_aio_flush(bs, cb, opaque);
2090 }
2091
2092 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2093 {
2094     acb->pool->cancel(acb);
2095 }
2096
2097
2098 /**************************************************************/
2099 /* async block device emulation */
2100
2101 typedef struct BlockDriverAIOCBSync {
2102     BlockDriverAIOCB common;
2103     QEMUBH *bh;
2104     int ret;
2105     /* vector translation state */
2106     QEMUIOVector *qiov;
2107     uint8_t *bounce;
2108     int is_write;
2109 } BlockDriverAIOCBSync;
2110
2111 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2112 {
2113     BlockDriverAIOCBSync *acb =
2114         container_of(blockacb, BlockDriverAIOCBSync, common);
2115     qemu_bh_delete(acb->bh);
2116     acb->bh = NULL;
2117     qemu_aio_release(acb);
2118 }
2119
2120 static AIOPool bdrv_em_aio_pool = {
2121     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2122     .cancel             = bdrv_aio_cancel_em,
2123 };
2124
2125 static void bdrv_aio_bh_cb(void *opaque)
2126 {
2127     BlockDriverAIOCBSync *acb = opaque;
2128
2129     if (!acb->is_write)
2130         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2131     qemu_vfree(acb->bounce);
2132     acb->common.cb(acb->common.opaque, acb->ret);
2133     qemu_bh_delete(acb->bh);
2134     acb->bh = NULL;
2135     qemu_aio_release(acb);
2136 }
2137
2138 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2139                                             int64_t sector_num,
2140                                             QEMUIOVector *qiov,
2141                                             int nb_sectors,
2142                                             BlockDriverCompletionFunc *cb,
2143                                             void *opaque,
2144                                             int is_write)
2145
2146 {
2147     BlockDriverAIOCBSync *acb;
2148
2149     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2150     acb->is_write = is_write;
2151     acb->qiov = qiov;
2152     acb->bounce = qemu_blockalign(bs, qiov->size);
2153
2154     if (!acb->bh)
2155         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2156
2157     if (is_write) {
2158         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2159         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2160     } else {
2161         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2162     }
2163
2164     qemu_bh_schedule(acb->bh);
2165
2166     return &acb->common;
2167 }
2168
2169 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2170         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2171         BlockDriverCompletionFunc *cb, void *opaque)
2172 {
2173     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2174 }
2175
2176 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2177         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2178         BlockDriverCompletionFunc *cb, void *opaque)
2179 {
2180     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2181 }
2182
2183 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2184         BlockDriverCompletionFunc *cb, void *opaque)
2185 {
2186     BlockDriverAIOCBSync *acb;
2187
2188     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2189     acb->is_write = 1; /* don't bounce in the completion hadler */
2190     acb->qiov = NULL;
2191     acb->bounce = NULL;
2192     acb->ret = 0;
2193
2194     if (!acb->bh)
2195         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2196
2197     bdrv_flush(bs);
2198     qemu_bh_schedule(acb->bh);
2199     return &acb->common;
2200 }
2201
2202 /**************************************************************/
2203 /* sync block device emulation */
2204
2205 static void bdrv_rw_em_cb(void *opaque, int ret)
2206 {
2207     *(int *)opaque = ret;
2208 }
2209
2210 #define NOT_DONE 0x7fffffff
2211
2212 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2213                         uint8_t *buf, int nb_sectors)
2214 {
2215     int async_ret;
2216     BlockDriverAIOCB *acb;
2217     struct iovec iov;
2218     QEMUIOVector qiov;
2219
2220     async_context_push();
2221
2222     async_ret = NOT_DONE;
2223     iov.iov_base = (void *)buf;
2224     iov.iov_len = nb_sectors * 512;
2225     qemu_iovec_init_external(&qiov, &iov, 1);
2226     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2227         bdrv_rw_em_cb, &async_ret);
2228     if (acb == NULL) {
2229         async_ret = -1;
2230         goto fail;
2231     }
2232
2233     while (async_ret == NOT_DONE) {
2234         qemu_aio_wait();
2235     }
2236
2237
2238 fail:
2239     async_context_pop();
2240     return async_ret;
2241 }
2242
2243 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2244                          const uint8_t *buf, int nb_sectors)
2245 {
2246     int async_ret;
2247     BlockDriverAIOCB *acb;
2248     struct iovec iov;
2249     QEMUIOVector qiov;
2250
2251     async_context_push();
2252
2253     async_ret = NOT_DONE;
2254     iov.iov_base = (void *)buf;
2255     iov.iov_len = nb_sectors * 512;
2256     qemu_iovec_init_external(&qiov, &iov, 1);
2257     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2258         bdrv_rw_em_cb, &async_ret);
2259     if (acb == NULL) {
2260         async_ret = -1;
2261         goto fail;
2262     }
2263     while (async_ret == NOT_DONE) {
2264         qemu_aio_wait();
2265     }
2266
2267 fail:
2268     async_context_pop();
2269     return async_ret;
2270 }
2271
2272 void bdrv_init(void)
2273 {
2274     module_call_init(MODULE_INIT_BLOCK);
2275 }
2276
2277 void bdrv_init_with_whitelist(void)
2278 {
2279     use_bdrv_whitelist = 1;
2280     bdrv_init();
2281 }
2282
2283 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2284                    BlockDriverCompletionFunc *cb, void *opaque)
2285 {
2286     BlockDriverAIOCB *acb;
2287
2288     if (pool->free_aiocb) {
2289         acb = pool->free_aiocb;
2290         pool->free_aiocb = acb->next;
2291     } else {
2292         acb = qemu_mallocz(pool->aiocb_size);
2293         acb->pool = pool;
2294     }
2295     acb->bs = bs;
2296     acb->cb = cb;
2297     acb->opaque = opaque;
2298     return acb;
2299 }
2300
2301 void qemu_aio_release(void *p)
2302 {
2303     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2304     AIOPool *pool = acb->pool;
2305     acb->next = pool->free_aiocb;
2306     pool->free_aiocb = acb;
2307 }
2308
2309 /**************************************************************/
2310 /* removable device support */
2311
2312 /**
2313  * Return TRUE if the media is present
2314  */
2315 int bdrv_is_inserted(BlockDriverState *bs)
2316 {
2317     BlockDriver *drv = bs->drv;
2318     int ret;
2319     if (!drv)
2320         return 0;
2321     if (!drv->bdrv_is_inserted)
2322         return 1;
2323     ret = drv->bdrv_is_inserted(bs);
2324     return ret;
2325 }
2326
2327 /**
2328  * Return TRUE if the media changed since the last call to this
2329  * function. It is currently only used for floppy disks
2330  */
2331 int bdrv_media_changed(BlockDriverState *bs)
2332 {
2333     BlockDriver *drv = bs->drv;
2334     int ret;
2335
2336     if (!drv || !drv->bdrv_media_changed)
2337         ret = -ENOTSUP;
2338     else
2339         ret = drv->bdrv_media_changed(bs);
2340     if (ret == -ENOTSUP)
2341         ret = bs->media_changed;
2342     bs->media_changed = 0;
2343     return ret;
2344 }
2345
2346 /**
2347  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2348  */
2349 int bdrv_eject(BlockDriverState *bs, int eject_flag)
2350 {
2351     BlockDriver *drv = bs->drv;
2352     int ret;
2353
2354     if (bs->locked) {
2355         return -EBUSY;
2356     }
2357
2358     if (!drv || !drv->bdrv_eject) {
2359         ret = -ENOTSUP;
2360     } else {
2361         ret = drv->bdrv_eject(bs, eject_flag);
2362     }
2363     if (ret == -ENOTSUP) {
2364         if (eject_flag)
2365             bdrv_close(bs);
2366         ret = 0;
2367     }
2368
2369     return ret;
2370 }
2371
2372 int bdrv_is_locked(BlockDriverState *bs)
2373 {
2374     return bs->locked;
2375 }
2376
2377 /**
2378  * Lock or unlock the media (if it is locked, the user won't be able
2379  * to eject it manually).
2380  */
2381 void bdrv_set_locked(BlockDriverState *bs, int locked)
2382 {
2383     BlockDriver *drv = bs->drv;
2384
2385     bs->locked = locked;
2386     if (drv && drv->bdrv_set_locked) {
2387         drv->bdrv_set_locked(bs, locked);
2388     }
2389 }
2390
2391 /* needed for generic scsi interface */
2392
2393 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2394 {
2395     BlockDriver *drv = bs->drv;
2396
2397     if (drv && drv->bdrv_ioctl)
2398         return drv->bdrv_ioctl(bs, req, buf);
2399     return -ENOTSUP;
2400 }
2401
2402 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2403         unsigned long int req, void *buf,
2404         BlockDriverCompletionFunc *cb, void *opaque)
2405 {
2406     BlockDriver *drv = bs->drv;
2407
2408     if (drv && drv->bdrv_aio_ioctl)
2409         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2410     return NULL;
2411 }
2412
2413
2414
2415 void *qemu_blockalign(BlockDriverState *bs, size_t size)
2416 {
2417     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2418 }
2419
2420 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2421 {
2422     int64_t bitmap_size;
2423
2424     bs->dirty_count = 0;
2425     if (enable) {
2426         if (!bs->dirty_bitmap) {
2427             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2428                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2429             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2430
2431             bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2432         }
2433     } else {
2434         if (bs->dirty_bitmap) {
2435             qemu_free(bs->dirty_bitmap);
2436             bs->dirty_bitmap = NULL;
2437         }
2438     }
2439 }
2440
2441 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2442 {
2443     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2444
2445     if (bs->dirty_bitmap &&
2446         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2447         return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2448             (1 << (chunk % (sizeof(unsigned long) * 8)));
2449     } else {
2450         return 0;
2451     }
2452 }
2453
2454 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2455                       int nr_sectors)
2456 {
2457     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2458 }
2459
2460 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
2461 {
2462     return bs->dirty_count;
2463 }
This page took 0.158758 seconds and 4 git commands to generate.