]> Git Repo - qemu.git/blob - block.c
net: clean up includes in net.c
[qemu.git] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
29
30 #ifdef CONFIG_BSD
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 #include <sys/ioctl.h>
34 #include <sys/queue.h>
35 #ifndef __DragonFly__
36 #include <sys/disk.h>
37 #endif
38 #endif
39
40 #ifdef _WIN32
41 #include <windows.h>
42 #endif
43
44 #define SECTOR_BITS 9
45 #define SECTOR_SIZE (1 << SECTOR_BITS)
46 #define SECTORS_PER_DIRTY_CHUNK 8
47
48 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
49         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50         BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
52         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53         BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
55         BlockDriverCompletionFunc *cb, void *opaque);
56 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
57                         uint8_t *buf, int nb_sectors);
58 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
59                          const uint8_t *buf, int nb_sectors);
60
61 BlockDriverState *bdrv_first;
62
63 static BlockDriver *first_drv;
64
65 /* If non-zero, use only whitelisted block drivers */
66 static int use_bdrv_whitelist;
67
68 int path_is_absolute(const char *path)
69 {
70     const char *p;
71 #ifdef _WIN32
72     /* specific case for names like: "\\.\d:" */
73     if (*path == '/' || *path == '\\')
74         return 1;
75 #endif
76     p = strchr(path, ':');
77     if (p)
78         p++;
79     else
80         p = path;
81 #ifdef _WIN32
82     return (*p == '/' || *p == '\\');
83 #else
84     return (*p == '/');
85 #endif
86 }
87
88 /* if filename is absolute, just copy it to dest. Otherwise, build a
89    path to it by considering it is relative to base_path. URL are
90    supported. */
91 void path_combine(char *dest, int dest_size,
92                   const char *base_path,
93                   const char *filename)
94 {
95     const char *p, *p1;
96     int len;
97
98     if (dest_size <= 0)
99         return;
100     if (path_is_absolute(filename)) {
101         pstrcpy(dest, dest_size, filename);
102     } else {
103         p = strchr(base_path, ':');
104         if (p)
105             p++;
106         else
107             p = base_path;
108         p1 = strrchr(base_path, '/');
109 #ifdef _WIN32
110         {
111             const char *p2;
112             p2 = strrchr(base_path, '\\');
113             if (!p1 || p2 > p1)
114                 p1 = p2;
115         }
116 #endif
117         if (p1)
118             p1++;
119         else
120             p1 = base_path;
121         if (p1 > p)
122             p = p1;
123         len = p - base_path;
124         if (len > dest_size - 1)
125             len = dest_size - 1;
126         memcpy(dest, base_path, len);
127         dest[len] = '\0';
128         pstrcat(dest, dest_size, filename);
129     }
130 }
131
132 void bdrv_register(BlockDriver *bdrv)
133 {
134     if (!bdrv->bdrv_aio_readv) {
135         /* add AIO emulation layer */
136         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
137         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
138     } else if (!bdrv->bdrv_read) {
139         /* add synchronous IO emulation layer */
140         bdrv->bdrv_read = bdrv_read_em;
141         bdrv->bdrv_write = bdrv_write_em;
142     }
143
144     if (!bdrv->bdrv_aio_flush)
145         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
146
147     bdrv->next = first_drv;
148     first_drv = bdrv;
149 }
150
151 /* create a new block device (by default it is empty) */
152 BlockDriverState *bdrv_new(const char *device_name)
153 {
154     BlockDriverState **pbs, *bs;
155
156     bs = qemu_mallocz(sizeof(BlockDriverState));
157     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
158     if (device_name[0] != '\0') {
159         /* insert at the end */
160         pbs = &bdrv_first;
161         while (*pbs != NULL)
162             pbs = &(*pbs)->next;
163         *pbs = bs;
164     }
165     return bs;
166 }
167
168 BlockDriver *bdrv_find_format(const char *format_name)
169 {
170     BlockDriver *drv1;
171     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
172         if (!strcmp(drv1->format_name, format_name))
173             return drv1;
174     }
175     return NULL;
176 }
177
178 static int bdrv_is_whitelisted(BlockDriver *drv)
179 {
180     static const char *whitelist[] = {
181         CONFIG_BDRV_WHITELIST
182     };
183     const char **p;
184
185     if (!whitelist[0])
186         return 1;               /* no whitelist, anything goes */
187
188     for (p = whitelist; *p; p++) {
189         if (!strcmp(drv->format_name, *p)) {
190             return 1;
191         }
192     }
193     return 0;
194 }
195
196 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
197 {
198     BlockDriver *drv = bdrv_find_format(format_name);
199     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
200 }
201
202 int bdrv_create(BlockDriver *drv, const char* filename,
203     QEMUOptionParameter *options)
204 {
205     if (!drv->bdrv_create)
206         return -ENOTSUP;
207
208     return drv->bdrv_create(filename, options);
209 }
210
211 #ifdef _WIN32
212 void get_tmp_filename(char *filename, int size)
213 {
214     char temp_dir[MAX_PATH];
215
216     GetTempPath(MAX_PATH, temp_dir);
217     GetTempFileName(temp_dir, "qem", 0, filename);
218 }
219 #else
220 void get_tmp_filename(char *filename, int size)
221 {
222     int fd;
223     const char *tmpdir;
224     /* XXX: race condition possible */
225     tmpdir = getenv("TMPDIR");
226     if (!tmpdir)
227         tmpdir = "/tmp";
228     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
229     fd = mkstemp(filename);
230     close(fd);
231 }
232 #endif
233
234 #ifdef _WIN32
235 static int is_windows_drive_prefix(const char *filename)
236 {
237     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
238              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
239             filename[1] == ':');
240 }
241
242 int is_windows_drive(const char *filename)
243 {
244     if (is_windows_drive_prefix(filename) &&
245         filename[2] == '\0')
246         return 1;
247     if (strstart(filename, "\\\\.\\", NULL) ||
248         strstart(filename, "//./", NULL))
249         return 1;
250     return 0;
251 }
252 #endif
253
254 static BlockDriver *find_protocol(const char *filename)
255 {
256     BlockDriver *drv1;
257     char protocol[128];
258     int len;
259     const char *p;
260
261 #ifdef _WIN32
262     if (is_windows_drive(filename) ||
263         is_windows_drive_prefix(filename))
264         return bdrv_find_format("raw");
265 #endif
266     p = strchr(filename, ':');
267     if (!p)
268         return bdrv_find_format("raw");
269     len = p - filename;
270     if (len > sizeof(protocol) - 1)
271         len = sizeof(protocol) - 1;
272     memcpy(protocol, filename, len);
273     protocol[len] = '\0';
274     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
275         if (drv1->protocol_name &&
276             !strcmp(drv1->protocol_name, protocol))
277             return drv1;
278     }
279     return NULL;
280 }
281
282 /*
283  * Detect host devices. By convention, /dev/cdrom[N] is always
284  * recognized as a host CDROM.
285  */
286 static BlockDriver *find_hdev_driver(const char *filename)
287 {
288     int score_max = 0, score;
289     BlockDriver *drv = NULL, *d;
290
291     for (d = first_drv; d; d = d->next) {
292         if (d->bdrv_probe_device) {
293             score = d->bdrv_probe_device(filename);
294             if (score > score_max) {
295                 score_max = score;
296                 drv = d;
297             }
298         }
299     }
300
301     return drv;
302 }
303
304 static BlockDriver *find_image_format(const char *filename)
305 {
306     int ret, score, score_max;
307     BlockDriver *drv1, *drv;
308     uint8_t buf[2048];
309     BlockDriverState *bs;
310
311     drv = find_protocol(filename);
312     /* no need to test disk image formats for vvfat */
313     if (drv && strcmp(drv->format_name, "vvfat") == 0)
314         return drv;
315
316     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
317     if (ret < 0)
318         return NULL;
319     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
320     bdrv_delete(bs);
321     if (ret < 0) {
322         return NULL;
323     }
324
325     score_max = 0;
326     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
327         if (drv1->bdrv_probe) {
328             score = drv1->bdrv_probe(buf, ret, filename);
329             if (score > score_max) {
330                 score_max = score;
331                 drv = drv1;
332             }
333         }
334     }
335     return drv;
336 }
337
338 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
339 {
340     BlockDriverState *bs;
341     int ret;
342
343     bs = bdrv_new("");
344     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
345     if (ret < 0) {
346         bdrv_delete(bs);
347         return ret;
348     }
349     bs->growable = 1;
350     *pbs = bs;
351     return 0;
352 }
353
354 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
355 {
356     return bdrv_open2(bs, filename, flags, NULL);
357 }
358
359 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
360                BlockDriver *drv)
361 {
362     int ret, open_flags, try_rw;
363     char tmp_filename[PATH_MAX];
364     char backing_filename[PATH_MAX];
365
366     bs->is_temporary = 0;
367     bs->encrypted = 0;
368     bs->valid_key = 0;
369     /* buffer_alignment defaulted to 512, drivers can change this value */
370     bs->buffer_alignment = 512;
371
372     if (flags & BDRV_O_SNAPSHOT) {
373         BlockDriverState *bs1;
374         int64_t total_size;
375         int is_protocol = 0;
376         BlockDriver *bdrv_qcow2;
377         QEMUOptionParameter *options;
378
379         /* if snapshot, we create a temporary backing file and open it
380            instead of opening 'filename' directly */
381
382         /* if there is a backing file, use it */
383         bs1 = bdrv_new("");
384         ret = bdrv_open2(bs1, filename, 0, drv);
385         if (ret < 0) {
386             bdrv_delete(bs1);
387             return ret;
388         }
389         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
390
391         if (bs1->drv && bs1->drv->protocol_name)
392             is_protocol = 1;
393
394         bdrv_delete(bs1);
395
396         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
397
398         /* Real path is meaningless for protocols */
399         if (is_protocol)
400             snprintf(backing_filename, sizeof(backing_filename),
401                      "%s", filename);
402         else
403             realpath(filename, backing_filename);
404
405         bdrv_qcow2 = bdrv_find_format("qcow2");
406         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
407
408         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
409         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
410         if (drv) {
411             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
412                 drv->format_name);
413         }
414
415         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
416         if (ret < 0) {
417             return ret;
418         }
419
420         filename = tmp_filename;
421         drv = bdrv_qcow2;
422         bs->is_temporary = 1;
423     }
424
425     pstrcpy(bs->filename, sizeof(bs->filename), filename);
426     if (flags & BDRV_O_FILE) {
427         drv = find_protocol(filename);
428     } else if (!drv) {
429         drv = find_hdev_driver(filename);
430         if (!drv) {
431             drv = find_image_format(filename);
432         }
433     }
434     if (!drv) {
435         ret = -ENOENT;
436         goto unlink_and_fail;
437     }
438     bs->drv = drv;
439     bs->opaque = qemu_mallocz(drv->instance_size);
440
441     /*
442      * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
443      * write cache to the guest.  We do need the fdatasync to flush
444      * out transactions for block allocations, and we maybe have a
445      * volatile write cache in our backing device to deal with.
446      */
447     if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
448         bs->enable_write_cache = 1;
449
450     /* Note: for compatibility, we open disk image files as RDWR, and
451        RDONLY as fallback */
452     try_rw = !bs->read_only || bs->is_temporary;
453     if (!(flags & BDRV_O_FILE))
454         open_flags = (try_rw ? BDRV_O_RDWR : 0) |
455             (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO));
456     else
457         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
458     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv))
459         ret = -ENOTSUP;
460     else
461         ret = drv->bdrv_open(bs, filename, open_flags);
462     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
463         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
464         bs->read_only = 1;
465     }
466     if (ret < 0) {
467         qemu_free(bs->opaque);
468         bs->opaque = NULL;
469         bs->drv = NULL;
470     unlink_and_fail:
471         if (bs->is_temporary)
472             unlink(filename);
473         return ret;
474     }
475     if (drv->bdrv_getlength) {
476         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
477     }
478 #ifndef _WIN32
479     if (bs->is_temporary) {
480         unlink(filename);
481     }
482 #endif
483     if (bs->backing_file[0] != '\0') {
484         /* if there is a backing file, use it */
485         BlockDriver *back_drv = NULL;
486         bs->backing_hd = bdrv_new("");
487         /* pass on read_only property to the backing_hd */
488         bs->backing_hd->read_only = bs->read_only;
489         path_combine(backing_filename, sizeof(backing_filename),
490                      filename, bs->backing_file);
491         if (bs->backing_format[0] != '\0')
492             back_drv = bdrv_find_format(bs->backing_format);
493         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
494                          back_drv);
495         if (ret < 0) {
496             bdrv_close(bs);
497             return ret;
498         }
499     }
500
501     if (!bdrv_key_required(bs)) {
502         /* call the change callback */
503         bs->media_changed = 1;
504         if (bs->change_cb)
505             bs->change_cb(bs->change_opaque);
506     }
507     return 0;
508 }
509
510 void bdrv_close(BlockDriverState *bs)
511 {
512     if (bs->drv) {
513         if (bs->backing_hd)
514             bdrv_delete(bs->backing_hd);
515         bs->drv->bdrv_close(bs);
516         qemu_free(bs->opaque);
517 #ifdef _WIN32
518         if (bs->is_temporary) {
519             unlink(bs->filename);
520         }
521 #endif
522         bs->opaque = NULL;
523         bs->drv = NULL;
524
525         /* call the change callback */
526         bs->media_changed = 1;
527         if (bs->change_cb)
528             bs->change_cb(bs->change_opaque);
529     }
530 }
531
532 void bdrv_delete(BlockDriverState *bs)
533 {
534     BlockDriverState **pbs;
535
536     pbs = &bdrv_first;
537     while (*pbs != bs && *pbs != NULL)
538         pbs = &(*pbs)->next;
539     if (*pbs == bs)
540         *pbs = bs->next;
541
542     bdrv_close(bs);
543     qemu_free(bs);
544 }
545
546 /*
547  * Run consistency checks on an image
548  *
549  * Returns the number of errors or -errno when an internal error occurs
550  */
551 int bdrv_check(BlockDriverState *bs)
552 {
553     if (bs->drv->bdrv_check == NULL) {
554         return -ENOTSUP;
555     }
556
557     return bs->drv->bdrv_check(bs);
558 }
559
560 /* commit COW file into the raw image */
561 int bdrv_commit(BlockDriverState *bs)
562 {
563     BlockDriver *drv = bs->drv;
564     int64_t i, total_sectors;
565     int n, j;
566     unsigned char sector[512];
567
568     if (!drv)
569         return -ENOMEDIUM;
570
571     if (bs->read_only) {
572         return -EACCES;
573     }
574
575     if (!bs->backing_hd) {
576         return -ENOTSUP;
577     }
578
579     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
580     for (i = 0; i < total_sectors;) {
581         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
582             for(j = 0; j < n; j++) {
583                 if (bdrv_read(bs, i, sector, 1) != 0) {
584                     return -EIO;
585                 }
586
587                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
588                     return -EIO;
589                 }
590                 i++;
591             }
592         } else {
593             i += n;
594         }
595     }
596
597     if (drv->bdrv_make_empty)
598         return drv->bdrv_make_empty(bs);
599
600     return 0;
601 }
602
603 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
604                                    size_t size)
605 {
606     int64_t len;
607
608     if (!bdrv_is_inserted(bs))
609         return -ENOMEDIUM;
610
611     if (bs->growable)
612         return 0;
613
614     len = bdrv_getlength(bs);
615
616     if (offset < 0)
617         return -EIO;
618
619     if ((offset > len) || (len - offset < size))
620         return -EIO;
621
622     return 0;
623 }
624
625 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
626                               int nb_sectors)
627 {
628     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
629 }
630
631 /* return < 0 if error. See bdrv_write() for the return codes */
632 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
633               uint8_t *buf, int nb_sectors)
634 {
635     BlockDriver *drv = bs->drv;
636
637     if (!drv)
638         return -ENOMEDIUM;
639     if (bdrv_check_request(bs, sector_num, nb_sectors))
640         return -EIO;
641
642     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
643 }
644
645 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
646                              int nb_sectors, int dirty)
647 {
648     int64_t start, end;
649     start = sector_num / SECTORS_PER_DIRTY_CHUNK;
650     end = (sector_num + nb_sectors) / SECTORS_PER_DIRTY_CHUNK;
651     
652     for(; start <= end; start++) {
653         bs->dirty_bitmap[start] = dirty;
654     }
655 }
656
657 /* Return < 0 if error. Important errors are:
658   -EIO         generic I/O error (may happen for all errors)
659   -ENOMEDIUM   No media inserted.
660   -EINVAL      Invalid sector number or nb_sectors
661   -EACCES      Trying to write a read-only device
662 */
663 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
664                const uint8_t *buf, int nb_sectors)
665 {
666     BlockDriver *drv = bs->drv;
667     if (!bs->drv)
668         return -ENOMEDIUM;
669     if (bs->read_only)
670         return -EACCES;
671     if (bdrv_check_request(bs, sector_num, nb_sectors))
672         return -EIO;
673     
674     if(bs->dirty_tracking) {
675         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
676     }
677     
678     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
679 }
680
681 int bdrv_pread(BlockDriverState *bs, int64_t offset,
682                void *buf, int count1)
683 {
684     uint8_t tmp_buf[SECTOR_SIZE];
685     int len, nb_sectors, count;
686     int64_t sector_num;
687
688     count = count1;
689     /* first read to align to sector start */
690     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
691     if (len > count)
692         len = count;
693     sector_num = offset >> SECTOR_BITS;
694     if (len > 0) {
695         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
696             return -EIO;
697         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
698         count -= len;
699         if (count == 0)
700             return count1;
701         sector_num++;
702         buf += len;
703     }
704
705     /* read the sectors "in place" */
706     nb_sectors = count >> SECTOR_BITS;
707     if (nb_sectors > 0) {
708         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
709             return -EIO;
710         sector_num += nb_sectors;
711         len = nb_sectors << SECTOR_BITS;
712         buf += len;
713         count -= len;
714     }
715
716     /* add data from the last sector */
717     if (count > 0) {
718         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
719             return -EIO;
720         memcpy(buf, tmp_buf, count);
721     }
722     return count1;
723 }
724
725 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
726                 const void *buf, int count1)
727 {
728     uint8_t tmp_buf[SECTOR_SIZE];
729     int len, nb_sectors, count;
730     int64_t sector_num;
731
732     count = count1;
733     /* first write to align to sector start */
734     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
735     if (len > count)
736         len = count;
737     sector_num = offset >> SECTOR_BITS;
738     if (len > 0) {
739         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
740             return -EIO;
741         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
742         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
743             return -EIO;
744         count -= len;
745         if (count == 0)
746             return count1;
747         sector_num++;
748         buf += len;
749     }
750
751     /* write the sectors "in place" */
752     nb_sectors = count >> SECTOR_BITS;
753     if (nb_sectors > 0) {
754         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
755             return -EIO;
756         sector_num += nb_sectors;
757         len = nb_sectors << SECTOR_BITS;
758         buf += len;
759         count -= len;
760     }
761
762     /* add data from the last sector */
763     if (count > 0) {
764         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
765             return -EIO;
766         memcpy(tmp_buf, buf, count);
767         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
768             return -EIO;
769     }
770     return count1;
771 }
772
773 /**
774  * Truncate file to 'offset' bytes (needed only for file protocols)
775  */
776 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
777 {
778     BlockDriver *drv = bs->drv;
779     if (!drv)
780         return -ENOMEDIUM;
781     if (!drv->bdrv_truncate)
782         return -ENOTSUP;
783     if (bs->read_only)
784         return -EACCES;
785     return drv->bdrv_truncate(bs, offset);
786 }
787
788 /**
789  * Length of a file in bytes. Return < 0 if error or unknown.
790  */
791 int64_t bdrv_getlength(BlockDriverState *bs)
792 {
793     BlockDriver *drv = bs->drv;
794     if (!drv)
795         return -ENOMEDIUM;
796     if (!drv->bdrv_getlength) {
797         /* legacy mode */
798         return bs->total_sectors * SECTOR_SIZE;
799     }
800     return drv->bdrv_getlength(bs);
801 }
802
803 /* return 0 as number of sectors if no device present or error */
804 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
805 {
806     int64_t length;
807     length = bdrv_getlength(bs);
808     if (length < 0)
809         length = 0;
810     else
811         length = length >> SECTOR_BITS;
812     *nb_sectors_ptr = length;
813 }
814
815 struct partition {
816         uint8_t boot_ind;           /* 0x80 - active */
817         uint8_t head;               /* starting head */
818         uint8_t sector;             /* starting sector */
819         uint8_t cyl;                /* starting cylinder */
820         uint8_t sys_ind;            /* What partition type */
821         uint8_t end_head;           /* end head */
822         uint8_t end_sector;         /* end sector */
823         uint8_t end_cyl;            /* end cylinder */
824         uint32_t start_sect;        /* starting sector counting from 0 */
825         uint32_t nr_sects;          /* nr of sectors in partition */
826 } __attribute__((packed));
827
828 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
829 static int guess_disk_lchs(BlockDriverState *bs,
830                            int *pcylinders, int *pheads, int *psectors)
831 {
832     uint8_t buf[512];
833     int ret, i, heads, sectors, cylinders;
834     struct partition *p;
835     uint32_t nr_sects;
836     uint64_t nb_sectors;
837
838     bdrv_get_geometry(bs, &nb_sectors);
839
840     ret = bdrv_read(bs, 0, buf, 1);
841     if (ret < 0)
842         return -1;
843     /* test msdos magic */
844     if (buf[510] != 0x55 || buf[511] != 0xaa)
845         return -1;
846     for(i = 0; i < 4; i++) {
847         p = ((struct partition *)(buf + 0x1be)) + i;
848         nr_sects = le32_to_cpu(p->nr_sects);
849         if (nr_sects && p->end_head) {
850             /* We make the assumption that the partition terminates on
851                a cylinder boundary */
852             heads = p->end_head + 1;
853             sectors = p->end_sector & 63;
854             if (sectors == 0)
855                 continue;
856             cylinders = nb_sectors / (heads * sectors);
857             if (cylinders < 1 || cylinders > 16383)
858                 continue;
859             *pheads = heads;
860             *psectors = sectors;
861             *pcylinders = cylinders;
862 #if 0
863             printf("guessed geometry: LCHS=%d %d %d\n",
864                    cylinders, heads, sectors);
865 #endif
866             return 0;
867         }
868     }
869     return -1;
870 }
871
872 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
873 {
874     int translation, lba_detected = 0;
875     int cylinders, heads, secs;
876     uint64_t nb_sectors;
877
878     /* if a geometry hint is available, use it */
879     bdrv_get_geometry(bs, &nb_sectors);
880     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
881     translation = bdrv_get_translation_hint(bs);
882     if (cylinders != 0) {
883         *pcyls = cylinders;
884         *pheads = heads;
885         *psecs = secs;
886     } else {
887         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
888             if (heads > 16) {
889                 /* if heads > 16, it means that a BIOS LBA
890                    translation was active, so the default
891                    hardware geometry is OK */
892                 lba_detected = 1;
893                 goto default_geometry;
894             } else {
895                 *pcyls = cylinders;
896                 *pheads = heads;
897                 *psecs = secs;
898                 /* disable any translation to be in sync with
899                    the logical geometry */
900                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
901                     bdrv_set_translation_hint(bs,
902                                               BIOS_ATA_TRANSLATION_NONE);
903                 }
904             }
905         } else {
906         default_geometry:
907             /* if no geometry, use a standard physical disk geometry */
908             cylinders = nb_sectors / (16 * 63);
909
910             if (cylinders > 16383)
911                 cylinders = 16383;
912             else if (cylinders < 2)
913                 cylinders = 2;
914             *pcyls = cylinders;
915             *pheads = 16;
916             *psecs = 63;
917             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
918                 if ((*pcyls * *pheads) <= 131072) {
919                     bdrv_set_translation_hint(bs,
920                                               BIOS_ATA_TRANSLATION_LARGE);
921                 } else {
922                     bdrv_set_translation_hint(bs,
923                                               BIOS_ATA_TRANSLATION_LBA);
924                 }
925             }
926         }
927         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
928     }
929 }
930
931 void bdrv_set_geometry_hint(BlockDriverState *bs,
932                             int cyls, int heads, int secs)
933 {
934     bs->cyls = cyls;
935     bs->heads = heads;
936     bs->secs = secs;
937 }
938
939 void bdrv_set_type_hint(BlockDriverState *bs, int type)
940 {
941     bs->type = type;
942     bs->removable = ((type == BDRV_TYPE_CDROM ||
943                       type == BDRV_TYPE_FLOPPY));
944 }
945
946 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
947 {
948     bs->translation = translation;
949 }
950
951 void bdrv_get_geometry_hint(BlockDriverState *bs,
952                             int *pcyls, int *pheads, int *psecs)
953 {
954     *pcyls = bs->cyls;
955     *pheads = bs->heads;
956     *psecs = bs->secs;
957 }
958
959 int bdrv_get_type_hint(BlockDriverState *bs)
960 {
961     return bs->type;
962 }
963
964 int bdrv_get_translation_hint(BlockDriverState *bs)
965 {
966     return bs->translation;
967 }
968
969 int bdrv_is_removable(BlockDriverState *bs)
970 {
971     return bs->removable;
972 }
973
974 int bdrv_is_read_only(BlockDriverState *bs)
975 {
976     return bs->read_only;
977 }
978
979 int bdrv_set_read_only(BlockDriverState *bs, int read_only)
980 {
981     int ret = bs->read_only;
982     bs->read_only = read_only;
983     return ret;
984 }
985
986 int bdrv_is_sg(BlockDriverState *bs)
987 {
988     return bs->sg;
989 }
990
991 int bdrv_enable_write_cache(BlockDriverState *bs)
992 {
993     return bs->enable_write_cache;
994 }
995
996 /* XXX: no longer used */
997 void bdrv_set_change_cb(BlockDriverState *bs,
998                         void (*change_cb)(void *opaque), void *opaque)
999 {
1000     bs->change_cb = change_cb;
1001     bs->change_opaque = opaque;
1002 }
1003
1004 int bdrv_is_encrypted(BlockDriverState *bs)
1005 {
1006     if (bs->backing_hd && bs->backing_hd->encrypted)
1007         return 1;
1008     return bs->encrypted;
1009 }
1010
1011 int bdrv_key_required(BlockDriverState *bs)
1012 {
1013     BlockDriverState *backing_hd = bs->backing_hd;
1014
1015     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1016         return 1;
1017     return (bs->encrypted && !bs->valid_key);
1018 }
1019
1020 int bdrv_set_key(BlockDriverState *bs, const char *key)
1021 {
1022     int ret;
1023     if (bs->backing_hd && bs->backing_hd->encrypted) {
1024         ret = bdrv_set_key(bs->backing_hd, key);
1025         if (ret < 0)
1026             return ret;
1027         if (!bs->encrypted)
1028             return 0;
1029     }
1030     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
1031         return -1;
1032     ret = bs->drv->bdrv_set_key(bs, key);
1033     if (ret < 0) {
1034         bs->valid_key = 0;
1035     } else if (!bs->valid_key) {
1036         bs->valid_key = 1;
1037         /* call the change callback now, we skipped it on open */
1038         bs->media_changed = 1;
1039         if (bs->change_cb)
1040             bs->change_cb(bs->change_opaque);
1041     }
1042     return ret;
1043 }
1044
1045 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1046 {
1047     if (!bs->drv) {
1048         buf[0] = '\0';
1049     } else {
1050         pstrcpy(buf, buf_size, bs->drv->format_name);
1051     }
1052 }
1053
1054 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1055                          void *opaque)
1056 {
1057     BlockDriver *drv;
1058
1059     for (drv = first_drv; drv != NULL; drv = drv->next) {
1060         it(opaque, drv->format_name);
1061     }
1062 }
1063
1064 BlockDriverState *bdrv_find(const char *name)
1065 {
1066     BlockDriverState *bs;
1067
1068     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1069         if (!strcmp(name, bs->device_name))
1070             return bs;
1071     }
1072     return NULL;
1073 }
1074
1075 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1076 {
1077     BlockDriverState *bs;
1078
1079     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1080         it(opaque, bs);
1081     }
1082 }
1083
1084 const char *bdrv_get_device_name(BlockDriverState *bs)
1085 {
1086     return bs->device_name;
1087 }
1088
1089 void bdrv_flush(BlockDriverState *bs)
1090 {
1091     if (!bs->drv)
1092         return;
1093     if (bs->drv->bdrv_flush)
1094         bs->drv->bdrv_flush(bs);
1095     if (bs->backing_hd)
1096         bdrv_flush(bs->backing_hd);
1097 }
1098
1099 void bdrv_flush_all(void)
1100 {
1101     BlockDriverState *bs;
1102
1103     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1104         if (bs->drv && !bdrv_is_read_only(bs) && 
1105             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1106             bdrv_flush(bs);
1107 }
1108
1109 /*
1110  * Returns true iff the specified sector is present in the disk image. Drivers
1111  * not implementing the functionality are assumed to not support backing files,
1112  * hence all their sectors are reported as allocated.
1113  *
1114  * 'pnum' is set to the number of sectors (including and immediately following
1115  * the specified sector) that are known to be in the same
1116  * allocated/unallocated state.
1117  *
1118  * 'nb_sectors' is the max value 'pnum' should be set to.
1119  */
1120 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1121         int *pnum)
1122 {
1123     int64_t n;
1124     if (!bs->drv->bdrv_is_allocated) {
1125         if (sector_num >= bs->total_sectors) {
1126             *pnum = 0;
1127             return 0;
1128         }
1129         n = bs->total_sectors - sector_num;
1130         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1131         return 1;
1132     }
1133     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1134 }
1135
1136 void bdrv_info(Monitor *mon)
1137 {
1138     BlockDriverState *bs;
1139
1140     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1141         monitor_printf(mon, "%s:", bs->device_name);
1142         monitor_printf(mon, " type=");
1143         switch(bs->type) {
1144         case BDRV_TYPE_HD:
1145             monitor_printf(mon, "hd");
1146             break;
1147         case BDRV_TYPE_CDROM:
1148             monitor_printf(mon, "cdrom");
1149             break;
1150         case BDRV_TYPE_FLOPPY:
1151             monitor_printf(mon, "floppy");
1152             break;
1153         }
1154         monitor_printf(mon, " removable=%d", bs->removable);
1155         if (bs->removable) {
1156             monitor_printf(mon, " locked=%d", bs->locked);
1157         }
1158         if (bs->drv) {
1159             monitor_printf(mon, " file=");
1160             monitor_print_filename(mon, bs->filename);
1161             if (bs->backing_file[0] != '\0') {
1162                 monitor_printf(mon, " backing_file=");
1163                 monitor_print_filename(mon, bs->backing_file);
1164             }
1165             monitor_printf(mon, " ro=%d", bs->read_only);
1166             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1167             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1168         } else {
1169             monitor_printf(mon, " [not inserted]");
1170         }
1171         monitor_printf(mon, "\n");
1172     }
1173 }
1174
1175 /* The "info blockstats" command. */
1176 void bdrv_info_stats(Monitor *mon)
1177 {
1178     BlockDriverState *bs;
1179
1180     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1181         monitor_printf(mon, "%s:"
1182                        " rd_bytes=%" PRIu64
1183                        " wr_bytes=%" PRIu64
1184                        " rd_operations=%" PRIu64
1185                        " wr_operations=%" PRIu64
1186                        "\n",
1187                        bs->device_name,
1188                        bs->rd_bytes, bs->wr_bytes,
1189                        bs->rd_ops, bs->wr_ops);
1190     }
1191 }
1192
1193 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1194 {
1195     if (bs->backing_hd && bs->backing_hd->encrypted)
1196         return bs->backing_file;
1197     else if (bs->encrypted)
1198         return bs->filename;
1199     else
1200         return NULL;
1201 }
1202
1203 void bdrv_get_backing_filename(BlockDriverState *bs,
1204                                char *filename, int filename_size)
1205 {
1206     if (!bs->backing_hd) {
1207         pstrcpy(filename, filename_size, "");
1208     } else {
1209         pstrcpy(filename, filename_size, bs->backing_file);
1210     }
1211 }
1212
1213 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1214                           const uint8_t *buf, int nb_sectors)
1215 {
1216     BlockDriver *drv = bs->drv;
1217     if (!drv)
1218         return -ENOMEDIUM;
1219     if (!drv->bdrv_write_compressed)
1220         return -ENOTSUP;
1221     if (bdrv_check_request(bs, sector_num, nb_sectors))
1222         return -EIO;
1223     
1224     if(bs->dirty_tracking) {
1225         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1226     }
1227     
1228     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1229 }
1230
1231 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1232 {
1233     BlockDriver *drv = bs->drv;
1234     if (!drv)
1235         return -ENOMEDIUM;
1236     if (!drv->bdrv_get_info)
1237         return -ENOTSUP;
1238     memset(bdi, 0, sizeof(*bdi));
1239     return drv->bdrv_get_info(bs, bdi);
1240 }
1241
1242 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1243                       int64_t pos, int size)
1244 {
1245     BlockDriver *drv = bs->drv;
1246     if (!drv)
1247         return -ENOMEDIUM;
1248     if (!drv->bdrv_save_vmstate)
1249         return -ENOTSUP;
1250     return drv->bdrv_save_vmstate(bs, buf, pos, size);
1251 }
1252
1253 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1254                       int64_t pos, int size)
1255 {
1256     BlockDriver *drv = bs->drv;
1257     if (!drv)
1258         return -ENOMEDIUM;
1259     if (!drv->bdrv_load_vmstate)
1260         return -ENOTSUP;
1261     return drv->bdrv_load_vmstate(bs, buf, pos, size);
1262 }
1263
1264 /**************************************************************/
1265 /* handling of snapshots */
1266
1267 int bdrv_snapshot_create(BlockDriverState *bs,
1268                          QEMUSnapshotInfo *sn_info)
1269 {
1270     BlockDriver *drv = bs->drv;
1271     if (!drv)
1272         return -ENOMEDIUM;
1273     if (!drv->bdrv_snapshot_create)
1274         return -ENOTSUP;
1275     return drv->bdrv_snapshot_create(bs, sn_info);
1276 }
1277
1278 int bdrv_snapshot_goto(BlockDriverState *bs,
1279                        const char *snapshot_id)
1280 {
1281     BlockDriver *drv = bs->drv;
1282     if (!drv)
1283         return -ENOMEDIUM;
1284     if (!drv->bdrv_snapshot_goto)
1285         return -ENOTSUP;
1286     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1287 }
1288
1289 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1290 {
1291     BlockDriver *drv = bs->drv;
1292     if (!drv)
1293         return -ENOMEDIUM;
1294     if (!drv->bdrv_snapshot_delete)
1295         return -ENOTSUP;
1296     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1297 }
1298
1299 int bdrv_snapshot_list(BlockDriverState *bs,
1300                        QEMUSnapshotInfo **psn_info)
1301 {
1302     BlockDriver *drv = bs->drv;
1303     if (!drv)
1304         return -ENOMEDIUM;
1305     if (!drv->bdrv_snapshot_list)
1306         return -ENOTSUP;
1307     return drv->bdrv_snapshot_list(bs, psn_info);
1308 }
1309
1310 #define NB_SUFFIXES 4
1311
1312 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1313 {
1314     static const char suffixes[NB_SUFFIXES] = "KMGT";
1315     int64_t base;
1316     int i;
1317
1318     if (size <= 999) {
1319         snprintf(buf, buf_size, "%" PRId64, size);
1320     } else {
1321         base = 1024;
1322         for(i = 0; i < NB_SUFFIXES; i++) {
1323             if (size < (10 * base)) {
1324                 snprintf(buf, buf_size, "%0.1f%c",
1325                          (double)size / base,
1326                          suffixes[i]);
1327                 break;
1328             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1329                 snprintf(buf, buf_size, "%" PRId64 "%c",
1330                          ((size + (base >> 1)) / base),
1331                          suffixes[i]);
1332                 break;
1333             }
1334             base = base * 1024;
1335         }
1336     }
1337     return buf;
1338 }
1339
1340 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1341 {
1342     char buf1[128], date_buf[128], clock_buf[128];
1343 #ifdef _WIN32
1344     struct tm *ptm;
1345 #else
1346     struct tm tm;
1347 #endif
1348     time_t ti;
1349     int64_t secs;
1350
1351     if (!sn) {
1352         snprintf(buf, buf_size,
1353                  "%-10s%-20s%7s%20s%15s",
1354                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1355     } else {
1356         ti = sn->date_sec;
1357 #ifdef _WIN32
1358         ptm = localtime(&ti);
1359         strftime(date_buf, sizeof(date_buf),
1360                  "%Y-%m-%d %H:%M:%S", ptm);
1361 #else
1362         localtime_r(&ti, &tm);
1363         strftime(date_buf, sizeof(date_buf),
1364                  "%Y-%m-%d %H:%M:%S", &tm);
1365 #endif
1366         secs = sn->vm_clock_nsec / 1000000000;
1367         snprintf(clock_buf, sizeof(clock_buf),
1368                  "%02d:%02d:%02d.%03d",
1369                  (int)(secs / 3600),
1370                  (int)((secs / 60) % 60),
1371                  (int)(secs % 60),
1372                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1373         snprintf(buf, buf_size,
1374                  "%-10s%-20s%7s%20s%15s",
1375                  sn->id_str, sn->name,
1376                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1377                  date_buf,
1378                  clock_buf);
1379     }
1380     return buf;
1381 }
1382
1383
1384 /**************************************************************/
1385 /* async I/Os */
1386
1387 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1388                                  QEMUIOVector *qiov, int nb_sectors,
1389                                  BlockDriverCompletionFunc *cb, void *opaque)
1390 {
1391     BlockDriver *drv = bs->drv;
1392     BlockDriverAIOCB *ret;
1393
1394     if (!drv)
1395         return NULL;
1396     if (bdrv_check_request(bs, sector_num, nb_sectors))
1397         return NULL;
1398
1399     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1400                               cb, opaque);
1401
1402     if (ret) {
1403         /* Update stats even though technically transfer has not happened. */
1404         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1405         bs->rd_ops ++;
1406     }
1407
1408     return ret;
1409 }
1410
1411 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1412                                   QEMUIOVector *qiov, int nb_sectors,
1413                                   BlockDriverCompletionFunc *cb, void *opaque)
1414 {
1415     BlockDriver *drv = bs->drv;
1416     BlockDriverAIOCB *ret;
1417
1418     if (!drv)
1419         return NULL;
1420     if (bs->read_only)
1421         return NULL;
1422     if (bdrv_check_request(bs, sector_num, nb_sectors))
1423         return NULL;
1424
1425     if(bs->dirty_tracking) {
1426         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1427     }
1428     
1429     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1430                                cb, opaque);
1431
1432     if (ret) {
1433         /* Update stats even though technically transfer has not happened. */
1434         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1435         bs->wr_ops ++;
1436     }
1437
1438     return ret;
1439 }
1440
1441
1442 typedef struct MultiwriteCB {
1443     int error;
1444     int num_requests;
1445     int num_callbacks;
1446     struct {
1447         BlockDriverCompletionFunc *cb;
1448         void *opaque;
1449         QEMUIOVector *free_qiov;
1450         void *free_buf;
1451     } callbacks[];
1452 } MultiwriteCB;
1453
1454 static void multiwrite_user_cb(MultiwriteCB *mcb)
1455 {
1456     int i;
1457
1458     for (i = 0; i < mcb->num_callbacks; i++) {
1459         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1460         qemu_free(mcb->callbacks[i].free_qiov);
1461         qemu_free(mcb->callbacks[i].free_buf);
1462     }
1463 }
1464
1465 static void multiwrite_cb(void *opaque, int ret)
1466 {
1467     MultiwriteCB *mcb = opaque;
1468
1469     if (ret < 0) {
1470         mcb->error = ret;
1471         multiwrite_user_cb(mcb);
1472     }
1473
1474     mcb->num_requests--;
1475     if (mcb->num_requests == 0) {
1476         if (mcb->error == 0) {
1477             multiwrite_user_cb(mcb);
1478         }
1479         qemu_free(mcb);
1480     }
1481 }
1482
1483 static int multiwrite_req_compare(const void *a, const void *b)
1484 {
1485     return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector);
1486 }
1487
1488 /*
1489  * Takes a bunch of requests and tries to merge them. Returns the number of
1490  * requests that remain after merging.
1491  */
1492 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
1493     int num_reqs, MultiwriteCB *mcb)
1494 {
1495     int i, outidx;
1496
1497     // Sort requests by start sector
1498     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
1499
1500     // Check if adjacent requests touch the same clusters. If so, combine them,
1501     // filling up gaps with zero sectors.
1502     outidx = 0;
1503     for (i = 1; i < num_reqs; i++) {
1504         int merge = 0;
1505         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
1506
1507         // This handles the cases that are valid for all block drivers, namely
1508         // exactly sequential writes and overlapping writes.
1509         if (reqs[i].sector <= oldreq_last) {
1510             merge = 1;
1511         }
1512
1513         // The block driver may decide that it makes sense to combine requests
1514         // even if there is a gap of some sectors between them. In this case,
1515         // the gap is filled with zeros (therefore only applicable for yet
1516         // unused space in format like qcow2).
1517         if (!merge && bs->drv->bdrv_merge_requests) {
1518             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
1519         }
1520
1521         if (merge) {
1522             size_t size;
1523             QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
1524             qemu_iovec_init(qiov,
1525                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
1526
1527             // Add the first request to the merged one. If the requests are
1528             // overlapping, drop the last sectors of the first request.
1529             size = (reqs[i].sector - reqs[outidx].sector) << 9;
1530             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
1531
1532             // We might need to add some zeros between the two requests
1533             if (reqs[i].sector > oldreq_last) {
1534                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
1535                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
1536                 memset(buf, 0, zero_bytes);
1537                 qemu_iovec_add(qiov, buf, zero_bytes);
1538                 mcb->callbacks[i].free_buf = buf;
1539             }
1540
1541             // Add the second request
1542             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
1543
1544             reqs[outidx].nb_sectors += reqs[i].nb_sectors;
1545             reqs[outidx].qiov = qiov;
1546
1547             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
1548         } else {
1549             outidx++;
1550             reqs[outidx].sector     = reqs[i].sector;
1551             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
1552             reqs[outidx].qiov       = reqs[i].qiov;
1553         }
1554     }
1555
1556     return outidx + 1;
1557 }
1558
1559 /*
1560  * Submit multiple AIO write requests at once.
1561  *
1562  * On success, the function returns 0 and all requests in the reqs array have
1563  * been submitted. In error case this function returns -1, and any of the
1564  * requests may or may not be submitted yet. In particular, this means that the
1565  * callback will be called for some of the requests, for others it won't. The
1566  * caller must check the error field of the BlockRequest to wait for the right
1567  * callbacks (if error != 0, no callback will be called).
1568  *
1569  * The implementation may modify the contents of the reqs array, e.g. to merge
1570  * requests. However, the fields opaque and error are left unmodified as they
1571  * are used to signal failure for a single request to the caller.
1572  */
1573 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
1574 {
1575     BlockDriverAIOCB *acb;
1576     MultiwriteCB *mcb;
1577     int i;
1578
1579     if (num_reqs == 0) {
1580         return 0;
1581     }
1582
1583     // Create MultiwriteCB structure
1584     mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
1585     mcb->num_requests = 0;
1586     mcb->num_callbacks = num_reqs;
1587
1588     for (i = 0; i < num_reqs; i++) {
1589         mcb->callbacks[i].cb = reqs[i].cb;
1590         mcb->callbacks[i].opaque = reqs[i].opaque;
1591     }
1592
1593     // Check for mergable requests
1594     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
1595
1596     // Run the aio requests
1597     for (i = 0; i < num_reqs; i++) {
1598         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
1599             reqs[i].nb_sectors, multiwrite_cb, mcb);
1600
1601         if (acb == NULL) {
1602             // We can only fail the whole thing if no request has been
1603             // submitted yet. Otherwise we'll wait for the submitted AIOs to
1604             // complete and report the error in the callback.
1605             if (mcb->num_requests == 0) {
1606                 reqs[i].error = EIO;
1607                 goto fail;
1608             } else {
1609                 mcb->error = EIO;
1610                 break;
1611             }
1612         } else {
1613             mcb->num_requests++;
1614         }
1615     }
1616
1617     return 0;
1618
1619 fail:
1620     free(mcb);
1621     return -1;
1622 }
1623
1624 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
1625         BlockDriverCompletionFunc *cb, void *opaque)
1626 {
1627     BlockDriver *drv = bs->drv;
1628
1629     if (!drv)
1630         return NULL;
1631
1632     /*
1633      * Note that unlike bdrv_flush the driver is reponsible for flushing a
1634      * backing image if it exists.
1635      */
1636     return drv->bdrv_aio_flush(bs, cb, opaque);
1637 }
1638
1639 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1640 {
1641     acb->pool->cancel(acb);
1642 }
1643
1644
1645 /**************************************************************/
1646 /* async block device emulation */
1647
1648 typedef struct BlockDriverAIOCBSync {
1649     BlockDriverAIOCB common;
1650     QEMUBH *bh;
1651     int ret;
1652     /* vector translation state */
1653     QEMUIOVector *qiov;
1654     uint8_t *bounce;
1655     int is_write;
1656 } BlockDriverAIOCBSync;
1657
1658 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1659 {
1660     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1661     qemu_bh_delete(acb->bh);
1662     acb->bh = NULL;
1663     qemu_aio_release(acb);
1664 }
1665
1666 static AIOPool bdrv_em_aio_pool = {
1667     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
1668     .cancel             = bdrv_aio_cancel_em,
1669 };
1670
1671 static void bdrv_aio_bh_cb(void *opaque)
1672 {
1673     BlockDriverAIOCBSync *acb = opaque;
1674
1675     if (!acb->is_write)
1676         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
1677     qemu_vfree(acb->bounce);
1678     acb->common.cb(acb->common.opaque, acb->ret);
1679     qemu_bh_delete(acb->bh);
1680     acb->bh = NULL;
1681     qemu_aio_release(acb);
1682 }
1683
1684 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1685                                             int64_t sector_num,
1686                                             QEMUIOVector *qiov,
1687                                             int nb_sectors,
1688                                             BlockDriverCompletionFunc *cb,
1689                                             void *opaque,
1690                                             int is_write)
1691
1692 {
1693     BlockDriverAIOCBSync *acb;
1694
1695     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
1696     acb->is_write = is_write;
1697     acb->qiov = qiov;
1698     acb->bounce = qemu_blockalign(bs, qiov->size);
1699
1700     if (!acb->bh)
1701         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1702
1703     if (is_write) {
1704         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1705         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
1706     } else {
1707         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
1708     }
1709
1710     qemu_bh_schedule(acb->bh);
1711
1712     return &acb->common;
1713 }
1714
1715 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
1716         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1717         BlockDriverCompletionFunc *cb, void *opaque)
1718 {
1719     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1720 }
1721
1722 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
1723         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1724         BlockDriverCompletionFunc *cb, void *opaque)
1725 {
1726     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1727 }
1728
1729 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
1730         BlockDriverCompletionFunc *cb, void *opaque)
1731 {
1732     BlockDriverAIOCBSync *acb;
1733
1734     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
1735     acb->is_write = 1; /* don't bounce in the completion hadler */
1736     acb->qiov = NULL;
1737     acb->bounce = NULL;
1738     acb->ret = 0;
1739
1740     if (!acb->bh)
1741         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1742
1743     bdrv_flush(bs);
1744     qemu_bh_schedule(acb->bh);
1745     return &acb->common;
1746 }
1747
1748 /**************************************************************/
1749 /* sync block device emulation */
1750
1751 static void bdrv_rw_em_cb(void *opaque, int ret)
1752 {
1753     *(int *)opaque = ret;
1754 }
1755
1756 #define NOT_DONE 0x7fffffff
1757
1758 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1759                         uint8_t *buf, int nb_sectors)
1760 {
1761     int async_ret;
1762     BlockDriverAIOCB *acb;
1763     struct iovec iov;
1764     QEMUIOVector qiov;
1765
1766     async_context_push();
1767
1768     async_ret = NOT_DONE;
1769     iov.iov_base = (void *)buf;
1770     iov.iov_len = nb_sectors * 512;
1771     qemu_iovec_init_external(&qiov, &iov, 1);
1772     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
1773         bdrv_rw_em_cb, &async_ret);
1774     if (acb == NULL) {
1775         async_ret = -1;
1776         goto fail;
1777     }
1778
1779     while (async_ret == NOT_DONE) {
1780         qemu_aio_wait();
1781     }
1782
1783
1784 fail:
1785     async_context_pop();
1786     return async_ret;
1787 }
1788
1789 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1790                          const uint8_t *buf, int nb_sectors)
1791 {
1792     int async_ret;
1793     BlockDriverAIOCB *acb;
1794     struct iovec iov;
1795     QEMUIOVector qiov;
1796
1797     async_context_push();
1798
1799     async_ret = NOT_DONE;
1800     iov.iov_base = (void *)buf;
1801     iov.iov_len = nb_sectors * 512;
1802     qemu_iovec_init_external(&qiov, &iov, 1);
1803     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
1804         bdrv_rw_em_cb, &async_ret);
1805     if (acb == NULL) {
1806         async_ret = -1;
1807         goto fail;
1808     }
1809     while (async_ret == NOT_DONE) {
1810         qemu_aio_wait();
1811     }
1812
1813 fail:
1814     async_context_pop();
1815     return async_ret;
1816 }
1817
1818 void bdrv_init(void)
1819 {
1820     module_call_init(MODULE_INIT_BLOCK);
1821 }
1822
1823 void bdrv_init_with_whitelist(void)
1824 {
1825     use_bdrv_whitelist = 1;
1826     bdrv_init();
1827 }
1828
1829 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
1830                    BlockDriverCompletionFunc *cb, void *opaque)
1831 {
1832     BlockDriverAIOCB *acb;
1833
1834     if (pool->free_aiocb) {
1835         acb = pool->free_aiocb;
1836         pool->free_aiocb = acb->next;
1837     } else {
1838         acb = qemu_mallocz(pool->aiocb_size);
1839         acb->pool = pool;
1840     }
1841     acb->bs = bs;
1842     acb->cb = cb;
1843     acb->opaque = opaque;
1844     return acb;
1845 }
1846
1847 void qemu_aio_release(void *p)
1848 {
1849     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1850     AIOPool *pool = acb->pool;
1851     acb->next = pool->free_aiocb;
1852     pool->free_aiocb = acb;
1853 }
1854
1855 /**************************************************************/
1856 /* removable device support */
1857
1858 /**
1859  * Return TRUE if the media is present
1860  */
1861 int bdrv_is_inserted(BlockDriverState *bs)
1862 {
1863     BlockDriver *drv = bs->drv;
1864     int ret;
1865     if (!drv)
1866         return 0;
1867     if (!drv->bdrv_is_inserted)
1868         return 1;
1869     ret = drv->bdrv_is_inserted(bs);
1870     return ret;
1871 }
1872
1873 /**
1874  * Return TRUE if the media changed since the last call to this
1875  * function. It is currently only used for floppy disks
1876  */
1877 int bdrv_media_changed(BlockDriverState *bs)
1878 {
1879     BlockDriver *drv = bs->drv;
1880     int ret;
1881
1882     if (!drv || !drv->bdrv_media_changed)
1883         ret = -ENOTSUP;
1884     else
1885         ret = drv->bdrv_media_changed(bs);
1886     if (ret == -ENOTSUP)
1887         ret = bs->media_changed;
1888     bs->media_changed = 0;
1889     return ret;
1890 }
1891
1892 /**
1893  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1894  */
1895 int bdrv_eject(BlockDriverState *bs, int eject_flag)
1896 {
1897     BlockDriver *drv = bs->drv;
1898     int ret;
1899
1900     if (bs->locked) {
1901         return -EBUSY;
1902     }
1903
1904     if (!drv || !drv->bdrv_eject) {
1905         ret = -ENOTSUP;
1906     } else {
1907         ret = drv->bdrv_eject(bs, eject_flag);
1908     }
1909     if (ret == -ENOTSUP) {
1910         if (eject_flag)
1911             bdrv_close(bs);
1912         ret = 0;
1913     }
1914
1915     return ret;
1916 }
1917
1918 int bdrv_is_locked(BlockDriverState *bs)
1919 {
1920     return bs->locked;
1921 }
1922
1923 /**
1924  * Lock or unlock the media (if it is locked, the user won't be able
1925  * to eject it manually).
1926  */
1927 void bdrv_set_locked(BlockDriverState *bs, int locked)
1928 {
1929     BlockDriver *drv = bs->drv;
1930
1931     bs->locked = locked;
1932     if (drv && drv->bdrv_set_locked) {
1933         drv->bdrv_set_locked(bs, locked);
1934     }
1935 }
1936
1937 /* needed for generic scsi interface */
1938
1939 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1940 {
1941     BlockDriver *drv = bs->drv;
1942
1943     if (drv && drv->bdrv_ioctl)
1944         return drv->bdrv_ioctl(bs, req, buf);
1945     return -ENOTSUP;
1946 }
1947
1948 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1949         unsigned long int req, void *buf,
1950         BlockDriverCompletionFunc *cb, void *opaque)
1951 {
1952     BlockDriver *drv = bs->drv;
1953
1954     if (drv && drv->bdrv_aio_ioctl)
1955         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1956     return NULL;
1957 }
1958
1959
1960
1961 void *qemu_blockalign(BlockDriverState *bs, size_t size)
1962 {
1963     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
1964 }
1965
1966 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
1967 {
1968     int64_t bitmap_size;
1969     if(enable) {
1970         if(bs->dirty_tracking == 0) {
1971             int64_t i;
1972             uint8_t test;
1973             bitmap_size = (bdrv_getlength(bs) >> SECTOR_BITS);
1974             bitmap_size /= SECTORS_PER_DIRTY_CHUNK;
1975             bitmap_size++;
1976             
1977             bs->dirty_bitmap = qemu_mallocz(bitmap_size);
1978             
1979             bs->dirty_tracking = enable;
1980             for(i = 0; i < bitmap_size; i++) test = bs->dirty_bitmap[i]; 
1981         }
1982     } else {
1983         if(bs->dirty_tracking != 0) {
1984             qemu_free(bs->dirty_bitmap);
1985             bs->dirty_tracking = enable;
1986         }
1987     }
1988 }
1989
1990 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
1991 {
1992     int64_t chunk = sector / (int64_t)SECTORS_PER_DIRTY_CHUNK;
1993     
1994     if(bs->dirty_bitmap != NULL && 
1995        (sector << SECTOR_BITS) <= bdrv_getlength(bs)) {
1996         return bs->dirty_bitmap[chunk];
1997     } else {
1998         return 0;
1999     }
2000 }
2001
2002 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 
2003                       int nr_sectors)
2004 {
2005     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2006 }
2007
2008 int bdrv_get_sectors_per_chunk(void)
2009 {
2010     /* size must be 2^x */
2011     return SECTORS_PER_DIRTY_CHUNK;
2012 }
This page took 0.135739 seconds and 4 git commands to generate.