]> Git Repo - qemu.git/blob - block.c
block migration: Cleanup dirty tracking code
[qemu.git] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
29
30 #ifdef CONFIG_BSD
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 #include <sys/ioctl.h>
34 #include <sys/queue.h>
35 #ifndef __DragonFly__
36 #include <sys/disk.h>
37 #endif
38 #endif
39
40 #ifdef _WIN32
41 #include <windows.h>
42 #endif
43
44 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
45         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
46         BlockDriverCompletionFunc *cb, void *opaque);
47 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
48         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
49         BlockDriverCompletionFunc *cb, void *opaque);
50 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
51         BlockDriverCompletionFunc *cb, void *opaque);
52 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
53                         uint8_t *buf, int nb_sectors);
54 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
55                          const uint8_t *buf, int nb_sectors);
56
57 BlockDriverState *bdrv_first;
58
59 static BlockDriver *first_drv;
60
61 /* If non-zero, use only whitelisted block drivers */
62 static int use_bdrv_whitelist;
63
64 int path_is_absolute(const char *path)
65 {
66     const char *p;
67 #ifdef _WIN32
68     /* specific case for names like: "\\.\d:" */
69     if (*path == '/' || *path == '\\')
70         return 1;
71 #endif
72     p = strchr(path, ':');
73     if (p)
74         p++;
75     else
76         p = path;
77 #ifdef _WIN32
78     return (*p == '/' || *p == '\\');
79 #else
80     return (*p == '/');
81 #endif
82 }
83
84 /* if filename is absolute, just copy it to dest. Otherwise, build a
85    path to it by considering it is relative to base_path. URL are
86    supported. */
87 void path_combine(char *dest, int dest_size,
88                   const char *base_path,
89                   const char *filename)
90 {
91     const char *p, *p1;
92     int len;
93
94     if (dest_size <= 0)
95         return;
96     if (path_is_absolute(filename)) {
97         pstrcpy(dest, dest_size, filename);
98     } else {
99         p = strchr(base_path, ':');
100         if (p)
101             p++;
102         else
103             p = base_path;
104         p1 = strrchr(base_path, '/');
105 #ifdef _WIN32
106         {
107             const char *p2;
108             p2 = strrchr(base_path, '\\');
109             if (!p1 || p2 > p1)
110                 p1 = p2;
111         }
112 #endif
113         if (p1)
114             p1++;
115         else
116             p1 = base_path;
117         if (p1 > p)
118             p = p1;
119         len = p - base_path;
120         if (len > dest_size - 1)
121             len = dest_size - 1;
122         memcpy(dest, base_path, len);
123         dest[len] = '\0';
124         pstrcat(dest, dest_size, filename);
125     }
126 }
127
128 void bdrv_register(BlockDriver *bdrv)
129 {
130     if (!bdrv->bdrv_aio_readv) {
131         /* add AIO emulation layer */
132         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
133         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
134     } else if (!bdrv->bdrv_read) {
135         /* add synchronous IO emulation layer */
136         bdrv->bdrv_read = bdrv_read_em;
137         bdrv->bdrv_write = bdrv_write_em;
138     }
139
140     if (!bdrv->bdrv_aio_flush)
141         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
142
143     bdrv->next = first_drv;
144     first_drv = bdrv;
145 }
146
147 /* create a new block device (by default it is empty) */
148 BlockDriverState *bdrv_new(const char *device_name)
149 {
150     BlockDriverState **pbs, *bs;
151
152     bs = qemu_mallocz(sizeof(BlockDriverState));
153     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
154     if (device_name[0] != '\0') {
155         /* insert at the end */
156         pbs = &bdrv_first;
157         while (*pbs != NULL)
158             pbs = &(*pbs)->next;
159         *pbs = bs;
160     }
161     return bs;
162 }
163
164 BlockDriver *bdrv_find_format(const char *format_name)
165 {
166     BlockDriver *drv1;
167     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
168         if (!strcmp(drv1->format_name, format_name))
169             return drv1;
170     }
171     return NULL;
172 }
173
174 static int bdrv_is_whitelisted(BlockDriver *drv)
175 {
176     static const char *whitelist[] = {
177         CONFIG_BDRV_WHITELIST
178     };
179     const char **p;
180
181     if (!whitelist[0])
182         return 1;               /* no whitelist, anything goes */
183
184     for (p = whitelist; *p; p++) {
185         if (!strcmp(drv->format_name, *p)) {
186             return 1;
187         }
188     }
189     return 0;
190 }
191
192 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
193 {
194     BlockDriver *drv = bdrv_find_format(format_name);
195     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
196 }
197
198 int bdrv_create(BlockDriver *drv, const char* filename,
199     QEMUOptionParameter *options)
200 {
201     if (!drv->bdrv_create)
202         return -ENOTSUP;
203
204     return drv->bdrv_create(filename, options);
205 }
206
207 #ifdef _WIN32
208 void get_tmp_filename(char *filename, int size)
209 {
210     char temp_dir[MAX_PATH];
211
212     GetTempPath(MAX_PATH, temp_dir);
213     GetTempFileName(temp_dir, "qem", 0, filename);
214 }
215 #else
216 void get_tmp_filename(char *filename, int size)
217 {
218     int fd;
219     const char *tmpdir;
220     /* XXX: race condition possible */
221     tmpdir = getenv("TMPDIR");
222     if (!tmpdir)
223         tmpdir = "/tmp";
224     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
225     fd = mkstemp(filename);
226     close(fd);
227 }
228 #endif
229
230 #ifdef _WIN32
231 static int is_windows_drive_prefix(const char *filename)
232 {
233     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
234              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
235             filename[1] == ':');
236 }
237
238 int is_windows_drive(const char *filename)
239 {
240     if (is_windows_drive_prefix(filename) &&
241         filename[2] == '\0')
242         return 1;
243     if (strstart(filename, "\\\\.\\", NULL) ||
244         strstart(filename, "//./", NULL))
245         return 1;
246     return 0;
247 }
248 #endif
249
250 static BlockDriver *find_protocol(const char *filename)
251 {
252     BlockDriver *drv1;
253     char protocol[128];
254     int len;
255     const char *p;
256
257 #ifdef _WIN32
258     if (is_windows_drive(filename) ||
259         is_windows_drive_prefix(filename))
260         return bdrv_find_format("raw");
261 #endif
262     p = strchr(filename, ':');
263     if (!p)
264         return bdrv_find_format("raw");
265     len = p - filename;
266     if (len > sizeof(protocol) - 1)
267         len = sizeof(protocol) - 1;
268     memcpy(protocol, filename, len);
269     protocol[len] = '\0';
270     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
271         if (drv1->protocol_name &&
272             !strcmp(drv1->protocol_name, protocol))
273             return drv1;
274     }
275     return NULL;
276 }
277
278 /*
279  * Detect host devices. By convention, /dev/cdrom[N] is always
280  * recognized as a host CDROM.
281  */
282 static BlockDriver *find_hdev_driver(const char *filename)
283 {
284     int score_max = 0, score;
285     BlockDriver *drv = NULL, *d;
286
287     for (d = first_drv; d; d = d->next) {
288         if (d->bdrv_probe_device) {
289             score = d->bdrv_probe_device(filename);
290             if (score > score_max) {
291                 score_max = score;
292                 drv = d;
293             }
294         }
295     }
296
297     return drv;
298 }
299
300 static BlockDriver *find_image_format(const char *filename)
301 {
302     int ret, score, score_max;
303     BlockDriver *drv1, *drv;
304     uint8_t buf[2048];
305     BlockDriverState *bs;
306
307     drv = find_protocol(filename);
308     /* no need to test disk image formats for vvfat */
309     if (drv && strcmp(drv->format_name, "vvfat") == 0)
310         return drv;
311
312     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
313     if (ret < 0)
314         return NULL;
315     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
316     bdrv_delete(bs);
317     if (ret < 0) {
318         return NULL;
319     }
320
321     score_max = 0;
322     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
323         if (drv1->bdrv_probe) {
324             score = drv1->bdrv_probe(buf, ret, filename);
325             if (score > score_max) {
326                 score_max = score;
327                 drv = drv1;
328             }
329         }
330     }
331     return drv;
332 }
333
334 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
335 {
336     BlockDriverState *bs;
337     int ret;
338
339     bs = bdrv_new("");
340     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
341     if (ret < 0) {
342         bdrv_delete(bs);
343         return ret;
344     }
345     bs->growable = 1;
346     *pbs = bs;
347     return 0;
348 }
349
350 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
351 {
352     return bdrv_open2(bs, filename, flags, NULL);
353 }
354
355 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
356                BlockDriver *drv)
357 {
358     int ret, open_flags, try_rw;
359     char tmp_filename[PATH_MAX];
360     char backing_filename[PATH_MAX];
361
362     bs->is_temporary = 0;
363     bs->encrypted = 0;
364     bs->valid_key = 0;
365     /* buffer_alignment defaulted to 512, drivers can change this value */
366     bs->buffer_alignment = 512;
367
368     if (flags & BDRV_O_SNAPSHOT) {
369         BlockDriverState *bs1;
370         int64_t total_size;
371         int is_protocol = 0;
372         BlockDriver *bdrv_qcow2;
373         QEMUOptionParameter *options;
374
375         /* if snapshot, we create a temporary backing file and open it
376            instead of opening 'filename' directly */
377
378         /* if there is a backing file, use it */
379         bs1 = bdrv_new("");
380         ret = bdrv_open2(bs1, filename, 0, drv);
381         if (ret < 0) {
382             bdrv_delete(bs1);
383             return ret;
384         }
385         total_size = bdrv_getlength(bs1) >> BDRV_SECTOR_BITS;
386
387         if (bs1->drv && bs1->drv->protocol_name)
388             is_protocol = 1;
389
390         bdrv_delete(bs1);
391
392         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
393
394         /* Real path is meaningless for protocols */
395         if (is_protocol)
396             snprintf(backing_filename, sizeof(backing_filename),
397                      "%s", filename);
398         else
399             realpath(filename, backing_filename);
400
401         bdrv_qcow2 = bdrv_find_format("qcow2");
402         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
403
404         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
405         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
406         if (drv) {
407             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
408                 drv->format_name);
409         }
410
411         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
412         if (ret < 0) {
413             return ret;
414         }
415
416         filename = tmp_filename;
417         drv = bdrv_qcow2;
418         bs->is_temporary = 1;
419     }
420
421     pstrcpy(bs->filename, sizeof(bs->filename), filename);
422     if (flags & BDRV_O_FILE) {
423         drv = find_protocol(filename);
424     } else if (!drv) {
425         drv = find_hdev_driver(filename);
426         if (!drv) {
427             drv = find_image_format(filename);
428         }
429     }
430     if (!drv) {
431         ret = -ENOENT;
432         goto unlink_and_fail;
433     }
434     bs->drv = drv;
435     bs->opaque = qemu_mallocz(drv->instance_size);
436
437     /*
438      * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
439      * write cache to the guest.  We do need the fdatasync to flush
440      * out transactions for block allocations, and we maybe have a
441      * volatile write cache in our backing device to deal with.
442      */
443     if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
444         bs->enable_write_cache = 1;
445
446     /* Note: for compatibility, we open disk image files as RDWR, and
447        RDONLY as fallback */
448     try_rw = !bs->read_only || bs->is_temporary;
449     if (!(flags & BDRV_O_FILE))
450         open_flags = (try_rw ? BDRV_O_RDWR : 0) |
451             (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO));
452     else
453         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
454     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv))
455         ret = -ENOTSUP;
456     else
457         ret = drv->bdrv_open(bs, filename, open_flags);
458     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
459         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
460         bs->read_only = 1;
461     }
462     if (ret < 0) {
463         qemu_free(bs->opaque);
464         bs->opaque = NULL;
465         bs->drv = NULL;
466     unlink_and_fail:
467         if (bs->is_temporary)
468             unlink(filename);
469         return ret;
470     }
471     if (drv->bdrv_getlength) {
472         bs->total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
473     }
474 #ifndef _WIN32
475     if (bs->is_temporary) {
476         unlink(filename);
477     }
478 #endif
479     if (bs->backing_file[0] != '\0') {
480         /* if there is a backing file, use it */
481         BlockDriver *back_drv = NULL;
482         bs->backing_hd = bdrv_new("");
483         /* pass on read_only property to the backing_hd */
484         bs->backing_hd->read_only = bs->read_only;
485         path_combine(backing_filename, sizeof(backing_filename),
486                      filename, bs->backing_file);
487         if (bs->backing_format[0] != '\0')
488             back_drv = bdrv_find_format(bs->backing_format);
489         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
490                          back_drv);
491         if (ret < 0) {
492             bdrv_close(bs);
493             return ret;
494         }
495     }
496
497     if (!bdrv_key_required(bs)) {
498         /* call the change callback */
499         bs->media_changed = 1;
500         if (bs->change_cb)
501             bs->change_cb(bs->change_opaque);
502     }
503     return 0;
504 }
505
506 void bdrv_close(BlockDriverState *bs)
507 {
508     if (bs->drv) {
509         if (bs->backing_hd)
510             bdrv_delete(bs->backing_hd);
511         bs->drv->bdrv_close(bs);
512         qemu_free(bs->opaque);
513 #ifdef _WIN32
514         if (bs->is_temporary) {
515             unlink(bs->filename);
516         }
517 #endif
518         bs->opaque = NULL;
519         bs->drv = NULL;
520
521         /* call the change callback */
522         bs->media_changed = 1;
523         if (bs->change_cb)
524             bs->change_cb(bs->change_opaque);
525     }
526 }
527
528 void bdrv_delete(BlockDriverState *bs)
529 {
530     BlockDriverState **pbs;
531
532     pbs = &bdrv_first;
533     while (*pbs != bs && *pbs != NULL)
534         pbs = &(*pbs)->next;
535     if (*pbs == bs)
536         *pbs = bs->next;
537
538     bdrv_close(bs);
539     qemu_free(bs);
540 }
541
542 /*
543  * Run consistency checks on an image
544  *
545  * Returns the number of errors or -errno when an internal error occurs
546  */
547 int bdrv_check(BlockDriverState *bs)
548 {
549     if (bs->drv->bdrv_check == NULL) {
550         return -ENOTSUP;
551     }
552
553     return bs->drv->bdrv_check(bs);
554 }
555
556 /* commit COW file into the raw image */
557 int bdrv_commit(BlockDriverState *bs)
558 {
559     BlockDriver *drv = bs->drv;
560     int64_t i, total_sectors;
561     int n, j;
562     unsigned char sector[512];
563
564     if (!drv)
565         return -ENOMEDIUM;
566
567     if (bs->read_only) {
568         return -EACCES;
569     }
570
571     if (!bs->backing_hd) {
572         return -ENOTSUP;
573     }
574
575     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
576     for (i = 0; i < total_sectors;) {
577         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
578             for(j = 0; j < n; j++) {
579                 if (bdrv_read(bs, i, sector, 1) != 0) {
580                     return -EIO;
581                 }
582
583                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
584                     return -EIO;
585                 }
586                 i++;
587             }
588         } else {
589             i += n;
590         }
591     }
592
593     if (drv->bdrv_make_empty)
594         return drv->bdrv_make_empty(bs);
595
596     return 0;
597 }
598
599 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
600                                    size_t size)
601 {
602     int64_t len;
603
604     if (!bdrv_is_inserted(bs))
605         return -ENOMEDIUM;
606
607     if (bs->growable)
608         return 0;
609
610     len = bdrv_getlength(bs);
611
612     if (offset < 0)
613         return -EIO;
614
615     if ((offset > len) || (len - offset < size))
616         return -EIO;
617
618     return 0;
619 }
620
621 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
622                               int nb_sectors)
623 {
624     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
625 }
626
627 /* return < 0 if error. See bdrv_write() for the return codes */
628 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
629               uint8_t *buf, int nb_sectors)
630 {
631     BlockDriver *drv = bs->drv;
632
633     if (!drv)
634         return -ENOMEDIUM;
635     if (bdrv_check_request(bs, sector_num, nb_sectors))
636         return -EIO;
637
638     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
639 }
640
641 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
642                              int nb_sectors, int dirty)
643 {
644     int64_t start, end;
645     unsigned long val, idx, bit;
646
647     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
648     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
649
650     for (; start <= end; start++) {
651         idx = start / (sizeof(unsigned long) * 8);
652         bit = start % (sizeof(unsigned long) * 8);
653         val = bs->dirty_bitmap[idx];
654         if (dirty) {
655             val |= 1 << bit;
656         } else {
657             val &= ~(1 << bit);
658         }
659         bs->dirty_bitmap[idx] = val;
660     }
661 }
662
663 /* Return < 0 if error. Important errors are:
664   -EIO         generic I/O error (may happen for all errors)
665   -ENOMEDIUM   No media inserted.
666   -EINVAL      Invalid sector number or nb_sectors
667   -EACCES      Trying to write a read-only device
668 */
669 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
670                const uint8_t *buf, int nb_sectors)
671 {
672     BlockDriver *drv = bs->drv;
673     if (!bs->drv)
674         return -ENOMEDIUM;
675     if (bs->read_only)
676         return -EACCES;
677     if (bdrv_check_request(bs, sector_num, nb_sectors))
678         return -EIO;
679
680     if (bs->dirty_bitmap) {
681         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
682     }
683
684     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
685 }
686
687 int bdrv_pread(BlockDriverState *bs, int64_t offset,
688                void *buf, int count1)
689 {
690     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
691     int len, nb_sectors, count;
692     int64_t sector_num;
693
694     count = count1;
695     /* first read to align to sector start */
696     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
697     if (len > count)
698         len = count;
699     sector_num = offset >> BDRV_SECTOR_BITS;
700     if (len > 0) {
701         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
702             return -EIO;
703         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
704         count -= len;
705         if (count == 0)
706             return count1;
707         sector_num++;
708         buf += len;
709     }
710
711     /* read the sectors "in place" */
712     nb_sectors = count >> BDRV_SECTOR_BITS;
713     if (nb_sectors > 0) {
714         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
715             return -EIO;
716         sector_num += nb_sectors;
717         len = nb_sectors << BDRV_SECTOR_BITS;
718         buf += len;
719         count -= len;
720     }
721
722     /* add data from the last sector */
723     if (count > 0) {
724         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
725             return -EIO;
726         memcpy(buf, tmp_buf, count);
727     }
728     return count1;
729 }
730
731 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
732                 const void *buf, int count1)
733 {
734     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
735     int len, nb_sectors, count;
736     int64_t sector_num;
737
738     count = count1;
739     /* first write to align to sector start */
740     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
741     if (len > count)
742         len = count;
743     sector_num = offset >> BDRV_SECTOR_BITS;
744     if (len > 0) {
745         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
746             return -EIO;
747         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
748         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
749             return -EIO;
750         count -= len;
751         if (count == 0)
752             return count1;
753         sector_num++;
754         buf += len;
755     }
756
757     /* write the sectors "in place" */
758     nb_sectors = count >> BDRV_SECTOR_BITS;
759     if (nb_sectors > 0) {
760         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
761             return -EIO;
762         sector_num += nb_sectors;
763         len = nb_sectors << BDRV_SECTOR_BITS;
764         buf += len;
765         count -= len;
766     }
767
768     /* add data from the last sector */
769     if (count > 0) {
770         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
771             return -EIO;
772         memcpy(tmp_buf, buf, count);
773         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
774             return -EIO;
775     }
776     return count1;
777 }
778
779 /**
780  * Truncate file to 'offset' bytes (needed only for file protocols)
781  */
782 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
783 {
784     BlockDriver *drv = bs->drv;
785     if (!drv)
786         return -ENOMEDIUM;
787     if (!drv->bdrv_truncate)
788         return -ENOTSUP;
789     if (bs->read_only)
790         return -EACCES;
791     return drv->bdrv_truncate(bs, offset);
792 }
793
794 /**
795  * Length of a file in bytes. Return < 0 if error or unknown.
796  */
797 int64_t bdrv_getlength(BlockDriverState *bs)
798 {
799     BlockDriver *drv = bs->drv;
800     if (!drv)
801         return -ENOMEDIUM;
802     if (!drv->bdrv_getlength) {
803         /* legacy mode */
804         return bs->total_sectors * BDRV_SECTOR_SIZE;
805     }
806     return drv->bdrv_getlength(bs);
807 }
808
809 /* return 0 as number of sectors if no device present or error */
810 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
811 {
812     int64_t length;
813     length = bdrv_getlength(bs);
814     if (length < 0)
815         length = 0;
816     else
817         length = length >> BDRV_SECTOR_BITS;
818     *nb_sectors_ptr = length;
819 }
820
821 struct partition {
822         uint8_t boot_ind;           /* 0x80 - active */
823         uint8_t head;               /* starting head */
824         uint8_t sector;             /* starting sector */
825         uint8_t cyl;                /* starting cylinder */
826         uint8_t sys_ind;            /* What partition type */
827         uint8_t end_head;           /* end head */
828         uint8_t end_sector;         /* end sector */
829         uint8_t end_cyl;            /* end cylinder */
830         uint32_t start_sect;        /* starting sector counting from 0 */
831         uint32_t nr_sects;          /* nr of sectors in partition */
832 } __attribute__((packed));
833
834 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
835 static int guess_disk_lchs(BlockDriverState *bs,
836                            int *pcylinders, int *pheads, int *psectors)
837 {
838     uint8_t buf[512];
839     int ret, i, heads, sectors, cylinders;
840     struct partition *p;
841     uint32_t nr_sects;
842     uint64_t nb_sectors;
843
844     bdrv_get_geometry(bs, &nb_sectors);
845
846     ret = bdrv_read(bs, 0, buf, 1);
847     if (ret < 0)
848         return -1;
849     /* test msdos magic */
850     if (buf[510] != 0x55 || buf[511] != 0xaa)
851         return -1;
852     for(i = 0; i < 4; i++) {
853         p = ((struct partition *)(buf + 0x1be)) + i;
854         nr_sects = le32_to_cpu(p->nr_sects);
855         if (nr_sects && p->end_head) {
856             /* We make the assumption that the partition terminates on
857                a cylinder boundary */
858             heads = p->end_head + 1;
859             sectors = p->end_sector & 63;
860             if (sectors == 0)
861                 continue;
862             cylinders = nb_sectors / (heads * sectors);
863             if (cylinders < 1 || cylinders > 16383)
864                 continue;
865             *pheads = heads;
866             *psectors = sectors;
867             *pcylinders = cylinders;
868 #if 0
869             printf("guessed geometry: LCHS=%d %d %d\n",
870                    cylinders, heads, sectors);
871 #endif
872             return 0;
873         }
874     }
875     return -1;
876 }
877
878 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
879 {
880     int translation, lba_detected = 0;
881     int cylinders, heads, secs;
882     uint64_t nb_sectors;
883
884     /* if a geometry hint is available, use it */
885     bdrv_get_geometry(bs, &nb_sectors);
886     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
887     translation = bdrv_get_translation_hint(bs);
888     if (cylinders != 0) {
889         *pcyls = cylinders;
890         *pheads = heads;
891         *psecs = secs;
892     } else {
893         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
894             if (heads > 16) {
895                 /* if heads > 16, it means that a BIOS LBA
896                    translation was active, so the default
897                    hardware geometry is OK */
898                 lba_detected = 1;
899                 goto default_geometry;
900             } else {
901                 *pcyls = cylinders;
902                 *pheads = heads;
903                 *psecs = secs;
904                 /* disable any translation to be in sync with
905                    the logical geometry */
906                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
907                     bdrv_set_translation_hint(bs,
908                                               BIOS_ATA_TRANSLATION_NONE);
909                 }
910             }
911         } else {
912         default_geometry:
913             /* if no geometry, use a standard physical disk geometry */
914             cylinders = nb_sectors / (16 * 63);
915
916             if (cylinders > 16383)
917                 cylinders = 16383;
918             else if (cylinders < 2)
919                 cylinders = 2;
920             *pcyls = cylinders;
921             *pheads = 16;
922             *psecs = 63;
923             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
924                 if ((*pcyls * *pheads) <= 131072) {
925                     bdrv_set_translation_hint(bs,
926                                               BIOS_ATA_TRANSLATION_LARGE);
927                 } else {
928                     bdrv_set_translation_hint(bs,
929                                               BIOS_ATA_TRANSLATION_LBA);
930                 }
931             }
932         }
933         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
934     }
935 }
936
937 void bdrv_set_geometry_hint(BlockDriverState *bs,
938                             int cyls, int heads, int secs)
939 {
940     bs->cyls = cyls;
941     bs->heads = heads;
942     bs->secs = secs;
943 }
944
945 void bdrv_set_type_hint(BlockDriverState *bs, int type)
946 {
947     bs->type = type;
948     bs->removable = ((type == BDRV_TYPE_CDROM ||
949                       type == BDRV_TYPE_FLOPPY));
950 }
951
952 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
953 {
954     bs->translation = translation;
955 }
956
957 void bdrv_get_geometry_hint(BlockDriverState *bs,
958                             int *pcyls, int *pheads, int *psecs)
959 {
960     *pcyls = bs->cyls;
961     *pheads = bs->heads;
962     *psecs = bs->secs;
963 }
964
965 int bdrv_get_type_hint(BlockDriverState *bs)
966 {
967     return bs->type;
968 }
969
970 int bdrv_get_translation_hint(BlockDriverState *bs)
971 {
972     return bs->translation;
973 }
974
975 int bdrv_is_removable(BlockDriverState *bs)
976 {
977     return bs->removable;
978 }
979
980 int bdrv_is_read_only(BlockDriverState *bs)
981 {
982     return bs->read_only;
983 }
984
985 int bdrv_set_read_only(BlockDriverState *bs, int read_only)
986 {
987     int ret = bs->read_only;
988     bs->read_only = read_only;
989     return ret;
990 }
991
992 int bdrv_is_sg(BlockDriverState *bs)
993 {
994     return bs->sg;
995 }
996
997 int bdrv_enable_write_cache(BlockDriverState *bs)
998 {
999     return bs->enable_write_cache;
1000 }
1001
1002 /* XXX: no longer used */
1003 void bdrv_set_change_cb(BlockDriverState *bs,
1004                         void (*change_cb)(void *opaque), void *opaque)
1005 {
1006     bs->change_cb = change_cb;
1007     bs->change_opaque = opaque;
1008 }
1009
1010 int bdrv_is_encrypted(BlockDriverState *bs)
1011 {
1012     if (bs->backing_hd && bs->backing_hd->encrypted)
1013         return 1;
1014     return bs->encrypted;
1015 }
1016
1017 int bdrv_key_required(BlockDriverState *bs)
1018 {
1019     BlockDriverState *backing_hd = bs->backing_hd;
1020
1021     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1022         return 1;
1023     return (bs->encrypted && !bs->valid_key);
1024 }
1025
1026 int bdrv_set_key(BlockDriverState *bs, const char *key)
1027 {
1028     int ret;
1029     if (bs->backing_hd && bs->backing_hd->encrypted) {
1030         ret = bdrv_set_key(bs->backing_hd, key);
1031         if (ret < 0)
1032             return ret;
1033         if (!bs->encrypted)
1034             return 0;
1035     }
1036     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
1037         return -1;
1038     ret = bs->drv->bdrv_set_key(bs, key);
1039     if (ret < 0) {
1040         bs->valid_key = 0;
1041     } else if (!bs->valid_key) {
1042         bs->valid_key = 1;
1043         /* call the change callback now, we skipped it on open */
1044         bs->media_changed = 1;
1045         if (bs->change_cb)
1046             bs->change_cb(bs->change_opaque);
1047     }
1048     return ret;
1049 }
1050
1051 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1052 {
1053     if (!bs->drv) {
1054         buf[0] = '\0';
1055     } else {
1056         pstrcpy(buf, buf_size, bs->drv->format_name);
1057     }
1058 }
1059
1060 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1061                          void *opaque)
1062 {
1063     BlockDriver *drv;
1064
1065     for (drv = first_drv; drv != NULL; drv = drv->next) {
1066         it(opaque, drv->format_name);
1067     }
1068 }
1069
1070 BlockDriverState *bdrv_find(const char *name)
1071 {
1072     BlockDriverState *bs;
1073
1074     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1075         if (!strcmp(name, bs->device_name))
1076             return bs;
1077     }
1078     return NULL;
1079 }
1080
1081 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1082 {
1083     BlockDriverState *bs;
1084
1085     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1086         it(opaque, bs);
1087     }
1088 }
1089
1090 const char *bdrv_get_device_name(BlockDriverState *bs)
1091 {
1092     return bs->device_name;
1093 }
1094
1095 void bdrv_flush(BlockDriverState *bs)
1096 {
1097     if (!bs->drv)
1098         return;
1099     if (bs->drv->bdrv_flush)
1100         bs->drv->bdrv_flush(bs);
1101     if (bs->backing_hd)
1102         bdrv_flush(bs->backing_hd);
1103 }
1104
1105 void bdrv_flush_all(void)
1106 {
1107     BlockDriverState *bs;
1108
1109     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1110         if (bs->drv && !bdrv_is_read_only(bs) && 
1111             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1112             bdrv_flush(bs);
1113 }
1114
1115 /*
1116  * Returns true iff the specified sector is present in the disk image. Drivers
1117  * not implementing the functionality are assumed to not support backing files,
1118  * hence all their sectors are reported as allocated.
1119  *
1120  * 'pnum' is set to the number of sectors (including and immediately following
1121  * the specified sector) that are known to be in the same
1122  * allocated/unallocated state.
1123  *
1124  * 'nb_sectors' is the max value 'pnum' should be set to.
1125  */
1126 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1127         int *pnum)
1128 {
1129     int64_t n;
1130     if (!bs->drv->bdrv_is_allocated) {
1131         if (sector_num >= bs->total_sectors) {
1132             *pnum = 0;
1133             return 0;
1134         }
1135         n = bs->total_sectors - sector_num;
1136         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1137         return 1;
1138     }
1139     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1140 }
1141
1142 void bdrv_info(Monitor *mon)
1143 {
1144     BlockDriverState *bs;
1145
1146     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1147         monitor_printf(mon, "%s:", bs->device_name);
1148         monitor_printf(mon, " type=");
1149         switch(bs->type) {
1150         case BDRV_TYPE_HD:
1151             monitor_printf(mon, "hd");
1152             break;
1153         case BDRV_TYPE_CDROM:
1154             monitor_printf(mon, "cdrom");
1155             break;
1156         case BDRV_TYPE_FLOPPY:
1157             monitor_printf(mon, "floppy");
1158             break;
1159         }
1160         monitor_printf(mon, " removable=%d", bs->removable);
1161         if (bs->removable) {
1162             monitor_printf(mon, " locked=%d", bs->locked);
1163         }
1164         if (bs->drv) {
1165             monitor_printf(mon, " file=");
1166             monitor_print_filename(mon, bs->filename);
1167             if (bs->backing_file[0] != '\0') {
1168                 monitor_printf(mon, " backing_file=");
1169                 monitor_print_filename(mon, bs->backing_file);
1170             }
1171             monitor_printf(mon, " ro=%d", bs->read_only);
1172             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1173             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1174         } else {
1175             monitor_printf(mon, " [not inserted]");
1176         }
1177         monitor_printf(mon, "\n");
1178     }
1179 }
1180
1181 /* The "info blockstats" command. */
1182 void bdrv_info_stats(Monitor *mon)
1183 {
1184     BlockDriverState *bs;
1185
1186     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1187         monitor_printf(mon, "%s:"
1188                        " rd_bytes=%" PRIu64
1189                        " wr_bytes=%" PRIu64
1190                        " rd_operations=%" PRIu64
1191                        " wr_operations=%" PRIu64
1192                        "\n",
1193                        bs->device_name,
1194                        bs->rd_bytes, bs->wr_bytes,
1195                        bs->rd_ops, bs->wr_ops);
1196     }
1197 }
1198
1199 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1200 {
1201     if (bs->backing_hd && bs->backing_hd->encrypted)
1202         return bs->backing_file;
1203     else if (bs->encrypted)
1204         return bs->filename;
1205     else
1206         return NULL;
1207 }
1208
1209 void bdrv_get_backing_filename(BlockDriverState *bs,
1210                                char *filename, int filename_size)
1211 {
1212     if (!bs->backing_hd) {
1213         pstrcpy(filename, filename_size, "");
1214     } else {
1215         pstrcpy(filename, filename_size, bs->backing_file);
1216     }
1217 }
1218
1219 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1220                           const uint8_t *buf, int nb_sectors)
1221 {
1222     BlockDriver *drv = bs->drv;
1223     if (!drv)
1224         return -ENOMEDIUM;
1225     if (!drv->bdrv_write_compressed)
1226         return -ENOTSUP;
1227     if (bdrv_check_request(bs, sector_num, nb_sectors))
1228         return -EIO;
1229
1230     if (bs->dirty_bitmap) {
1231         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1232     }
1233
1234     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1235 }
1236
1237 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1238 {
1239     BlockDriver *drv = bs->drv;
1240     if (!drv)
1241         return -ENOMEDIUM;
1242     if (!drv->bdrv_get_info)
1243         return -ENOTSUP;
1244     memset(bdi, 0, sizeof(*bdi));
1245     return drv->bdrv_get_info(bs, bdi);
1246 }
1247
1248 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1249                       int64_t pos, int size)
1250 {
1251     BlockDriver *drv = bs->drv;
1252     if (!drv)
1253         return -ENOMEDIUM;
1254     if (!drv->bdrv_save_vmstate)
1255         return -ENOTSUP;
1256     return drv->bdrv_save_vmstate(bs, buf, pos, size);
1257 }
1258
1259 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1260                       int64_t pos, int size)
1261 {
1262     BlockDriver *drv = bs->drv;
1263     if (!drv)
1264         return -ENOMEDIUM;
1265     if (!drv->bdrv_load_vmstate)
1266         return -ENOTSUP;
1267     return drv->bdrv_load_vmstate(bs, buf, pos, size);
1268 }
1269
1270 /**************************************************************/
1271 /* handling of snapshots */
1272
1273 int bdrv_snapshot_create(BlockDriverState *bs,
1274                          QEMUSnapshotInfo *sn_info)
1275 {
1276     BlockDriver *drv = bs->drv;
1277     if (!drv)
1278         return -ENOMEDIUM;
1279     if (!drv->bdrv_snapshot_create)
1280         return -ENOTSUP;
1281     return drv->bdrv_snapshot_create(bs, sn_info);
1282 }
1283
1284 int bdrv_snapshot_goto(BlockDriverState *bs,
1285                        const char *snapshot_id)
1286 {
1287     BlockDriver *drv = bs->drv;
1288     if (!drv)
1289         return -ENOMEDIUM;
1290     if (!drv->bdrv_snapshot_goto)
1291         return -ENOTSUP;
1292     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1293 }
1294
1295 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1296 {
1297     BlockDriver *drv = bs->drv;
1298     if (!drv)
1299         return -ENOMEDIUM;
1300     if (!drv->bdrv_snapshot_delete)
1301         return -ENOTSUP;
1302     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1303 }
1304
1305 int bdrv_snapshot_list(BlockDriverState *bs,
1306                        QEMUSnapshotInfo **psn_info)
1307 {
1308     BlockDriver *drv = bs->drv;
1309     if (!drv)
1310         return -ENOMEDIUM;
1311     if (!drv->bdrv_snapshot_list)
1312         return -ENOTSUP;
1313     return drv->bdrv_snapshot_list(bs, psn_info);
1314 }
1315
1316 #define NB_SUFFIXES 4
1317
1318 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1319 {
1320     static const char suffixes[NB_SUFFIXES] = "KMGT";
1321     int64_t base;
1322     int i;
1323
1324     if (size <= 999) {
1325         snprintf(buf, buf_size, "%" PRId64, size);
1326     } else {
1327         base = 1024;
1328         for(i = 0; i < NB_SUFFIXES; i++) {
1329             if (size < (10 * base)) {
1330                 snprintf(buf, buf_size, "%0.1f%c",
1331                          (double)size / base,
1332                          suffixes[i]);
1333                 break;
1334             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1335                 snprintf(buf, buf_size, "%" PRId64 "%c",
1336                          ((size + (base >> 1)) / base),
1337                          suffixes[i]);
1338                 break;
1339             }
1340             base = base * 1024;
1341         }
1342     }
1343     return buf;
1344 }
1345
1346 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1347 {
1348     char buf1[128], date_buf[128], clock_buf[128];
1349 #ifdef _WIN32
1350     struct tm *ptm;
1351 #else
1352     struct tm tm;
1353 #endif
1354     time_t ti;
1355     int64_t secs;
1356
1357     if (!sn) {
1358         snprintf(buf, buf_size,
1359                  "%-10s%-20s%7s%20s%15s",
1360                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1361     } else {
1362         ti = sn->date_sec;
1363 #ifdef _WIN32
1364         ptm = localtime(&ti);
1365         strftime(date_buf, sizeof(date_buf),
1366                  "%Y-%m-%d %H:%M:%S", ptm);
1367 #else
1368         localtime_r(&ti, &tm);
1369         strftime(date_buf, sizeof(date_buf),
1370                  "%Y-%m-%d %H:%M:%S", &tm);
1371 #endif
1372         secs = sn->vm_clock_nsec / 1000000000;
1373         snprintf(clock_buf, sizeof(clock_buf),
1374                  "%02d:%02d:%02d.%03d",
1375                  (int)(secs / 3600),
1376                  (int)((secs / 60) % 60),
1377                  (int)(secs % 60),
1378                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1379         snprintf(buf, buf_size,
1380                  "%-10s%-20s%7s%20s%15s",
1381                  sn->id_str, sn->name,
1382                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1383                  date_buf,
1384                  clock_buf);
1385     }
1386     return buf;
1387 }
1388
1389
1390 /**************************************************************/
1391 /* async I/Os */
1392
1393 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1394                                  QEMUIOVector *qiov, int nb_sectors,
1395                                  BlockDriverCompletionFunc *cb, void *opaque)
1396 {
1397     BlockDriver *drv = bs->drv;
1398     BlockDriverAIOCB *ret;
1399
1400     if (!drv)
1401         return NULL;
1402     if (bdrv_check_request(bs, sector_num, nb_sectors))
1403         return NULL;
1404
1405     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1406                               cb, opaque);
1407
1408     if (ret) {
1409         /* Update stats even though technically transfer has not happened. */
1410         bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1411         bs->rd_ops ++;
1412     }
1413
1414     return ret;
1415 }
1416
1417 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1418                                   QEMUIOVector *qiov, int nb_sectors,
1419                                   BlockDriverCompletionFunc *cb, void *opaque)
1420 {
1421     BlockDriver *drv = bs->drv;
1422     BlockDriverAIOCB *ret;
1423
1424     if (!drv)
1425         return NULL;
1426     if (bs->read_only)
1427         return NULL;
1428     if (bdrv_check_request(bs, sector_num, nb_sectors))
1429         return NULL;
1430
1431     if (bs->dirty_bitmap) {
1432         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1433     }
1434
1435     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1436                                cb, opaque);
1437
1438     if (ret) {
1439         /* Update stats even though technically transfer has not happened. */
1440         bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1441         bs->wr_ops ++;
1442     }
1443
1444     return ret;
1445 }
1446
1447
1448 typedef struct MultiwriteCB {
1449     int error;
1450     int num_requests;
1451     int num_callbacks;
1452     struct {
1453         BlockDriverCompletionFunc *cb;
1454         void *opaque;
1455         QEMUIOVector *free_qiov;
1456         void *free_buf;
1457     } callbacks[];
1458 } MultiwriteCB;
1459
1460 static void multiwrite_user_cb(MultiwriteCB *mcb)
1461 {
1462     int i;
1463
1464     for (i = 0; i < mcb->num_callbacks; i++) {
1465         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1466         qemu_free(mcb->callbacks[i].free_qiov);
1467         qemu_free(mcb->callbacks[i].free_buf);
1468     }
1469 }
1470
1471 static void multiwrite_cb(void *opaque, int ret)
1472 {
1473     MultiwriteCB *mcb = opaque;
1474
1475     if (ret < 0) {
1476         mcb->error = ret;
1477         multiwrite_user_cb(mcb);
1478     }
1479
1480     mcb->num_requests--;
1481     if (mcb->num_requests == 0) {
1482         if (mcb->error == 0) {
1483             multiwrite_user_cb(mcb);
1484         }
1485         qemu_free(mcb);
1486     }
1487 }
1488
1489 static int multiwrite_req_compare(const void *a, const void *b)
1490 {
1491     return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector);
1492 }
1493
1494 /*
1495  * Takes a bunch of requests and tries to merge them. Returns the number of
1496  * requests that remain after merging.
1497  */
1498 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
1499     int num_reqs, MultiwriteCB *mcb)
1500 {
1501     int i, outidx;
1502
1503     // Sort requests by start sector
1504     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
1505
1506     // Check if adjacent requests touch the same clusters. If so, combine them,
1507     // filling up gaps with zero sectors.
1508     outidx = 0;
1509     for (i = 1; i < num_reqs; i++) {
1510         int merge = 0;
1511         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
1512
1513         // This handles the cases that are valid for all block drivers, namely
1514         // exactly sequential writes and overlapping writes.
1515         if (reqs[i].sector <= oldreq_last) {
1516             merge = 1;
1517         }
1518
1519         // The block driver may decide that it makes sense to combine requests
1520         // even if there is a gap of some sectors between them. In this case,
1521         // the gap is filled with zeros (therefore only applicable for yet
1522         // unused space in format like qcow2).
1523         if (!merge && bs->drv->bdrv_merge_requests) {
1524             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
1525         }
1526
1527         if (merge) {
1528             size_t size;
1529             QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
1530             qemu_iovec_init(qiov,
1531                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
1532
1533             // Add the first request to the merged one. If the requests are
1534             // overlapping, drop the last sectors of the first request.
1535             size = (reqs[i].sector - reqs[outidx].sector) << 9;
1536             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
1537
1538             // We might need to add some zeros between the two requests
1539             if (reqs[i].sector > oldreq_last) {
1540                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
1541                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
1542                 memset(buf, 0, zero_bytes);
1543                 qemu_iovec_add(qiov, buf, zero_bytes);
1544                 mcb->callbacks[i].free_buf = buf;
1545             }
1546
1547             // Add the second request
1548             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
1549
1550             reqs[outidx].nb_sectors += reqs[i].nb_sectors;
1551             reqs[outidx].qiov = qiov;
1552
1553             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
1554         } else {
1555             outidx++;
1556             reqs[outidx].sector     = reqs[i].sector;
1557             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
1558             reqs[outidx].qiov       = reqs[i].qiov;
1559         }
1560     }
1561
1562     return outidx + 1;
1563 }
1564
1565 /*
1566  * Submit multiple AIO write requests at once.
1567  *
1568  * On success, the function returns 0 and all requests in the reqs array have
1569  * been submitted. In error case this function returns -1, and any of the
1570  * requests may or may not be submitted yet. In particular, this means that the
1571  * callback will be called for some of the requests, for others it won't. The
1572  * caller must check the error field of the BlockRequest to wait for the right
1573  * callbacks (if error != 0, no callback will be called).
1574  *
1575  * The implementation may modify the contents of the reqs array, e.g. to merge
1576  * requests. However, the fields opaque and error are left unmodified as they
1577  * are used to signal failure for a single request to the caller.
1578  */
1579 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
1580 {
1581     BlockDriverAIOCB *acb;
1582     MultiwriteCB *mcb;
1583     int i;
1584
1585     if (num_reqs == 0) {
1586         return 0;
1587     }
1588
1589     // Create MultiwriteCB structure
1590     mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
1591     mcb->num_requests = 0;
1592     mcb->num_callbacks = num_reqs;
1593
1594     for (i = 0; i < num_reqs; i++) {
1595         mcb->callbacks[i].cb = reqs[i].cb;
1596         mcb->callbacks[i].opaque = reqs[i].opaque;
1597     }
1598
1599     // Check for mergable requests
1600     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
1601
1602     // Run the aio requests
1603     for (i = 0; i < num_reqs; i++) {
1604         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
1605             reqs[i].nb_sectors, multiwrite_cb, mcb);
1606
1607         if (acb == NULL) {
1608             // We can only fail the whole thing if no request has been
1609             // submitted yet. Otherwise we'll wait for the submitted AIOs to
1610             // complete and report the error in the callback.
1611             if (mcb->num_requests == 0) {
1612                 reqs[i].error = EIO;
1613                 goto fail;
1614             } else {
1615                 mcb->error = EIO;
1616                 break;
1617             }
1618         } else {
1619             mcb->num_requests++;
1620         }
1621     }
1622
1623     return 0;
1624
1625 fail:
1626     free(mcb);
1627     return -1;
1628 }
1629
1630 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
1631         BlockDriverCompletionFunc *cb, void *opaque)
1632 {
1633     BlockDriver *drv = bs->drv;
1634
1635     if (!drv)
1636         return NULL;
1637
1638     /*
1639      * Note that unlike bdrv_flush the driver is reponsible for flushing a
1640      * backing image if it exists.
1641      */
1642     return drv->bdrv_aio_flush(bs, cb, opaque);
1643 }
1644
1645 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1646 {
1647     acb->pool->cancel(acb);
1648 }
1649
1650
1651 /**************************************************************/
1652 /* async block device emulation */
1653
1654 typedef struct BlockDriverAIOCBSync {
1655     BlockDriverAIOCB common;
1656     QEMUBH *bh;
1657     int ret;
1658     /* vector translation state */
1659     QEMUIOVector *qiov;
1660     uint8_t *bounce;
1661     int is_write;
1662 } BlockDriverAIOCBSync;
1663
1664 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1665 {
1666     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1667     qemu_bh_delete(acb->bh);
1668     acb->bh = NULL;
1669     qemu_aio_release(acb);
1670 }
1671
1672 static AIOPool bdrv_em_aio_pool = {
1673     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
1674     .cancel             = bdrv_aio_cancel_em,
1675 };
1676
1677 static void bdrv_aio_bh_cb(void *opaque)
1678 {
1679     BlockDriverAIOCBSync *acb = opaque;
1680
1681     if (!acb->is_write)
1682         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
1683     qemu_vfree(acb->bounce);
1684     acb->common.cb(acb->common.opaque, acb->ret);
1685     qemu_bh_delete(acb->bh);
1686     acb->bh = NULL;
1687     qemu_aio_release(acb);
1688 }
1689
1690 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1691                                             int64_t sector_num,
1692                                             QEMUIOVector *qiov,
1693                                             int nb_sectors,
1694                                             BlockDriverCompletionFunc *cb,
1695                                             void *opaque,
1696                                             int is_write)
1697
1698 {
1699     BlockDriverAIOCBSync *acb;
1700
1701     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
1702     acb->is_write = is_write;
1703     acb->qiov = qiov;
1704     acb->bounce = qemu_blockalign(bs, qiov->size);
1705
1706     if (!acb->bh)
1707         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1708
1709     if (is_write) {
1710         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1711         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
1712     } else {
1713         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
1714     }
1715
1716     qemu_bh_schedule(acb->bh);
1717
1718     return &acb->common;
1719 }
1720
1721 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
1722         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1723         BlockDriverCompletionFunc *cb, void *opaque)
1724 {
1725     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1726 }
1727
1728 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
1729         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1730         BlockDriverCompletionFunc *cb, void *opaque)
1731 {
1732     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1733 }
1734
1735 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
1736         BlockDriverCompletionFunc *cb, void *opaque)
1737 {
1738     BlockDriverAIOCBSync *acb;
1739
1740     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
1741     acb->is_write = 1; /* don't bounce in the completion hadler */
1742     acb->qiov = NULL;
1743     acb->bounce = NULL;
1744     acb->ret = 0;
1745
1746     if (!acb->bh)
1747         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1748
1749     bdrv_flush(bs);
1750     qemu_bh_schedule(acb->bh);
1751     return &acb->common;
1752 }
1753
1754 /**************************************************************/
1755 /* sync block device emulation */
1756
1757 static void bdrv_rw_em_cb(void *opaque, int ret)
1758 {
1759     *(int *)opaque = ret;
1760 }
1761
1762 #define NOT_DONE 0x7fffffff
1763
1764 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1765                         uint8_t *buf, int nb_sectors)
1766 {
1767     int async_ret;
1768     BlockDriverAIOCB *acb;
1769     struct iovec iov;
1770     QEMUIOVector qiov;
1771
1772     async_context_push();
1773
1774     async_ret = NOT_DONE;
1775     iov.iov_base = (void *)buf;
1776     iov.iov_len = nb_sectors * 512;
1777     qemu_iovec_init_external(&qiov, &iov, 1);
1778     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
1779         bdrv_rw_em_cb, &async_ret);
1780     if (acb == NULL) {
1781         async_ret = -1;
1782         goto fail;
1783     }
1784
1785     while (async_ret == NOT_DONE) {
1786         qemu_aio_wait();
1787     }
1788
1789
1790 fail:
1791     async_context_pop();
1792     return async_ret;
1793 }
1794
1795 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1796                          const uint8_t *buf, int nb_sectors)
1797 {
1798     int async_ret;
1799     BlockDriverAIOCB *acb;
1800     struct iovec iov;
1801     QEMUIOVector qiov;
1802
1803     async_context_push();
1804
1805     async_ret = NOT_DONE;
1806     iov.iov_base = (void *)buf;
1807     iov.iov_len = nb_sectors * 512;
1808     qemu_iovec_init_external(&qiov, &iov, 1);
1809     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
1810         bdrv_rw_em_cb, &async_ret);
1811     if (acb == NULL) {
1812         async_ret = -1;
1813         goto fail;
1814     }
1815     while (async_ret == NOT_DONE) {
1816         qemu_aio_wait();
1817     }
1818
1819 fail:
1820     async_context_pop();
1821     return async_ret;
1822 }
1823
1824 void bdrv_init(void)
1825 {
1826     module_call_init(MODULE_INIT_BLOCK);
1827 }
1828
1829 void bdrv_init_with_whitelist(void)
1830 {
1831     use_bdrv_whitelist = 1;
1832     bdrv_init();
1833 }
1834
1835 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
1836                    BlockDriverCompletionFunc *cb, void *opaque)
1837 {
1838     BlockDriverAIOCB *acb;
1839
1840     if (pool->free_aiocb) {
1841         acb = pool->free_aiocb;
1842         pool->free_aiocb = acb->next;
1843     } else {
1844         acb = qemu_mallocz(pool->aiocb_size);
1845         acb->pool = pool;
1846     }
1847     acb->bs = bs;
1848     acb->cb = cb;
1849     acb->opaque = opaque;
1850     return acb;
1851 }
1852
1853 void qemu_aio_release(void *p)
1854 {
1855     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1856     AIOPool *pool = acb->pool;
1857     acb->next = pool->free_aiocb;
1858     pool->free_aiocb = acb;
1859 }
1860
1861 /**************************************************************/
1862 /* removable device support */
1863
1864 /**
1865  * Return TRUE if the media is present
1866  */
1867 int bdrv_is_inserted(BlockDriverState *bs)
1868 {
1869     BlockDriver *drv = bs->drv;
1870     int ret;
1871     if (!drv)
1872         return 0;
1873     if (!drv->bdrv_is_inserted)
1874         return 1;
1875     ret = drv->bdrv_is_inserted(bs);
1876     return ret;
1877 }
1878
1879 /**
1880  * Return TRUE if the media changed since the last call to this
1881  * function. It is currently only used for floppy disks
1882  */
1883 int bdrv_media_changed(BlockDriverState *bs)
1884 {
1885     BlockDriver *drv = bs->drv;
1886     int ret;
1887
1888     if (!drv || !drv->bdrv_media_changed)
1889         ret = -ENOTSUP;
1890     else
1891         ret = drv->bdrv_media_changed(bs);
1892     if (ret == -ENOTSUP)
1893         ret = bs->media_changed;
1894     bs->media_changed = 0;
1895     return ret;
1896 }
1897
1898 /**
1899  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1900  */
1901 int bdrv_eject(BlockDriverState *bs, int eject_flag)
1902 {
1903     BlockDriver *drv = bs->drv;
1904     int ret;
1905
1906     if (bs->locked) {
1907         return -EBUSY;
1908     }
1909
1910     if (!drv || !drv->bdrv_eject) {
1911         ret = -ENOTSUP;
1912     } else {
1913         ret = drv->bdrv_eject(bs, eject_flag);
1914     }
1915     if (ret == -ENOTSUP) {
1916         if (eject_flag)
1917             bdrv_close(bs);
1918         ret = 0;
1919     }
1920
1921     return ret;
1922 }
1923
1924 int bdrv_is_locked(BlockDriverState *bs)
1925 {
1926     return bs->locked;
1927 }
1928
1929 /**
1930  * Lock or unlock the media (if it is locked, the user won't be able
1931  * to eject it manually).
1932  */
1933 void bdrv_set_locked(BlockDriverState *bs, int locked)
1934 {
1935     BlockDriver *drv = bs->drv;
1936
1937     bs->locked = locked;
1938     if (drv && drv->bdrv_set_locked) {
1939         drv->bdrv_set_locked(bs, locked);
1940     }
1941 }
1942
1943 /* needed for generic scsi interface */
1944
1945 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1946 {
1947     BlockDriver *drv = bs->drv;
1948
1949     if (drv && drv->bdrv_ioctl)
1950         return drv->bdrv_ioctl(bs, req, buf);
1951     return -ENOTSUP;
1952 }
1953
1954 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1955         unsigned long int req, void *buf,
1956         BlockDriverCompletionFunc *cb, void *opaque)
1957 {
1958     BlockDriver *drv = bs->drv;
1959
1960     if (drv && drv->bdrv_aio_ioctl)
1961         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1962     return NULL;
1963 }
1964
1965
1966
1967 void *qemu_blockalign(BlockDriverState *bs, size_t size)
1968 {
1969     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
1970 }
1971
1972 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
1973 {
1974     int64_t bitmap_size;
1975
1976     if (enable) {
1977         if (!bs->dirty_bitmap) {
1978             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
1979                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
1980             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
1981
1982             bs->dirty_bitmap = qemu_mallocz(bitmap_size);
1983         }
1984     } else {
1985         if (bs->dirty_bitmap) {
1986             qemu_free(bs->dirty_bitmap);
1987             bs->dirty_bitmap = NULL;
1988         }
1989     }
1990 }
1991
1992 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
1993 {
1994     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
1995
1996     if (bs->dirty_bitmap &&
1997         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
1998         return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
1999             (1 << (chunk % (sizeof(unsigned long) * 8)));
2000     } else {
2001         return 0;
2002     }
2003 }
2004
2005 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2006                       int nr_sectors)
2007 {
2008     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2009 }
This page took 0.133081 seconds and 4 git commands to generate.