]> Git Repo - qemu.git/blob - hw/xen_disk.c
Merge remote-tracking branch 'qemu-kvm/uq/master' into staging
[qemu.git] / hw / xen_disk.c
1 /*
2  *  xen paravirt block device backend
3  *
4  *  (c) Gerd Hoffmann <[email protected]>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; under version 2 of the License.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License along
16  *  with this program; if not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Contributions after 2012-01-13 are licensed under the terms of the
19  *  GNU GPL, version 2 or (at your option) any later version.
20  */
21
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdarg.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <signal.h>
28 #include <inttypes.h>
29 #include <time.h>
30 #include <fcntl.h>
31 #include <errno.h>
32 #include <sys/ioctl.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <sys/mman.h>
36 #include <sys/uio.h>
37
38 #include <xs.h>
39 #include <xenctrl.h>
40 #include <xen/io/xenbus.h>
41
42 #include "hw.h"
43 #include "block_int.h"
44 #include "qemu-char.h"
45 #include "xen_blkif.h"
46 #include "xen_backend.h"
47 #include "blockdev.h"
48
49 /* ------------------------------------------------------------- */
50
51 static int batch_maps   = 0;
52
53 static int max_requests = 32;
54
55 /* ------------------------------------------------------------- */
56
57 #define BLOCK_SIZE  512
58 #define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
59
60 struct ioreq {
61     blkif_request_t     req;
62     int16_t             status;
63
64     /* parsed request */
65     off_t               start;
66     QEMUIOVector        v;
67     int                 presync;
68     int                 postsync;
69     uint8_t             mapped;
70
71     /* grant mapping */
72     uint32_t            domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
73     uint32_t            refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
74     int                 prot;
75     void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
76     void                *pages;
77
78     /* aio status */
79     int                 aio_inflight;
80     int                 aio_errors;
81
82     struct XenBlkDev    *blkdev;
83     QLIST_ENTRY(ioreq)   list;
84     BlockAcctCookie     acct;
85 };
86
87 struct XenBlkDev {
88     struct XenDevice    xendev;  /* must be first */
89     char                *params;
90     char                *mode;
91     char                *type;
92     char                *dev;
93     char                *devtype;
94     const char          *fileproto;
95     const char          *filename;
96     int                 ring_ref;
97     void                *sring;
98     int64_t             file_blk;
99     int64_t             file_size;
100     int                 protocol;
101     blkif_back_rings_t  rings;
102     int                 more_work;
103     int                 cnt_map;
104
105     /* request lists */
106     QLIST_HEAD(inflight_head, ioreq) inflight;
107     QLIST_HEAD(finished_head, ioreq) finished;
108     QLIST_HEAD(freelist_head, ioreq) freelist;
109     int                 requests_total;
110     int                 requests_inflight;
111     int                 requests_finished;
112
113     /* qemu block driver */
114     DriveInfo           *dinfo;
115     BlockDriverState    *bs;
116     QEMUBH              *bh;
117 };
118
119 /* ------------------------------------------------------------- */
120
121 static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
122 {
123     struct ioreq *ioreq = NULL;
124
125     if (QLIST_EMPTY(&blkdev->freelist)) {
126         if (blkdev->requests_total >= max_requests) {
127             goto out;
128         }
129         /* allocate new struct */
130         ioreq = g_malloc0(sizeof(*ioreq));
131         ioreq->blkdev = blkdev;
132         blkdev->requests_total++;
133         qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST);
134     } else {
135         /* get one from freelist */
136         ioreq = QLIST_FIRST(&blkdev->freelist);
137         QLIST_REMOVE(ioreq, list);
138         qemu_iovec_reset(&ioreq->v);
139     }
140     QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
141     blkdev->requests_inflight++;
142
143 out:
144     return ioreq;
145 }
146
147 static void ioreq_finish(struct ioreq *ioreq)
148 {
149     struct XenBlkDev *blkdev = ioreq->blkdev;
150
151     QLIST_REMOVE(ioreq, list);
152     QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
153     blkdev->requests_inflight--;
154     blkdev->requests_finished++;
155 }
156
157 static void ioreq_release(struct ioreq *ioreq, bool finish)
158 {
159     struct XenBlkDev *blkdev = ioreq->blkdev;
160
161     QLIST_REMOVE(ioreq, list);
162     memset(ioreq, 0, sizeof(*ioreq));
163     ioreq->blkdev = blkdev;
164     QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
165     if (finish) {
166         blkdev->requests_finished--;
167     } else {
168         blkdev->requests_inflight--;
169     }
170 }
171
172 /*
173  * translate request into iovec + start offset
174  * do sanity checks along the way
175  */
176 static int ioreq_parse(struct ioreq *ioreq)
177 {
178     struct XenBlkDev *blkdev = ioreq->blkdev;
179     uintptr_t mem;
180     size_t len;
181     int i;
182
183     xen_be_printf(&blkdev->xendev, 3,
184                   "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
185                   ioreq->req.operation, ioreq->req.nr_segments,
186                   ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
187     switch (ioreq->req.operation) {
188     case BLKIF_OP_READ:
189         ioreq->prot = PROT_WRITE; /* to memory */
190         break;
191     case BLKIF_OP_WRITE_BARRIER:
192         if (!ioreq->req.nr_segments) {
193             ioreq->presync = 1;
194             return 0;
195         }
196         ioreq->presync = ioreq->postsync = 1;
197         /* fall through */
198     case BLKIF_OP_WRITE:
199         ioreq->prot = PROT_READ; /* from memory */
200         break;
201     default:
202         xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
203                       ioreq->req.operation);
204         goto err;
205     };
206
207     if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
208         xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
209         goto err;
210     }
211
212     ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
213     for (i = 0; i < ioreq->req.nr_segments; i++) {
214         if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
215             xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
216             goto err;
217         }
218         if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
219             xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
220             goto err;
221         }
222         if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
223             xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
224             goto err;
225         }
226
227         ioreq->domids[i] = blkdev->xendev.dom;
228         ioreq->refs[i]   = ioreq->req.seg[i].gref;
229
230         mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
231         len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
232         qemu_iovec_add(&ioreq->v, (void*)mem, len);
233     }
234     if (ioreq->start + ioreq->v.size > blkdev->file_size) {
235         xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
236         goto err;
237     }
238     return 0;
239
240 err:
241     ioreq->status = BLKIF_RSP_ERROR;
242     return -1;
243 }
244
245 static void ioreq_unmap(struct ioreq *ioreq)
246 {
247     XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
248     int i;
249
250     if (ioreq->v.niov == 0 || ioreq->mapped == 0) {
251         return;
252     }
253     if (batch_maps) {
254         if (!ioreq->pages) {
255             return;
256         }
257         if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) {
258             xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
259                           strerror(errno));
260         }
261         ioreq->blkdev->cnt_map -= ioreq->v.niov;
262         ioreq->pages = NULL;
263     } else {
264         for (i = 0; i < ioreq->v.niov; i++) {
265             if (!ioreq->page[i]) {
266                 continue;
267             }
268             if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) {
269                 xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
270                               strerror(errno));
271             }
272             ioreq->blkdev->cnt_map--;
273             ioreq->page[i] = NULL;
274         }
275     }
276     ioreq->mapped = 0;
277 }
278
279 static int ioreq_map(struct ioreq *ioreq)
280 {
281     XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
282     int i;
283
284     if (ioreq->v.niov == 0 || ioreq->mapped == 1) {
285         return 0;
286     }
287     if (batch_maps) {
288         ioreq->pages = xc_gnttab_map_grant_refs
289             (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
290         if (ioreq->pages == NULL) {
291             xen_be_printf(&ioreq->blkdev->xendev, 0,
292                           "can't map %d grant refs (%s, %d maps)\n",
293                           ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map);
294             return -1;
295         }
296         for (i = 0; i < ioreq->v.niov; i++) {
297             ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
298                 (uintptr_t)ioreq->v.iov[i].iov_base;
299         }
300         ioreq->blkdev->cnt_map += ioreq->v.niov;
301     } else  {
302         for (i = 0; i < ioreq->v.niov; i++) {
303             ioreq->page[i] = xc_gnttab_map_grant_ref
304                 (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
305             if (ioreq->page[i] == NULL) {
306                 xen_be_printf(&ioreq->blkdev->xendev, 0,
307                               "can't map grant ref %d (%s, %d maps)\n",
308                               ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map);
309                 ioreq_unmap(ioreq);
310                 return -1;
311             }
312             ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base;
313             ioreq->blkdev->cnt_map++;
314         }
315     }
316     ioreq->mapped = 1;
317     return 0;
318 }
319
320 static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
321
322 static void qemu_aio_complete(void *opaque, int ret)
323 {
324     struct ioreq *ioreq = opaque;
325
326     if (ret != 0) {
327         xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
328                       ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
329         ioreq->aio_errors++;
330     }
331
332     ioreq->aio_inflight--;
333     if (ioreq->presync) {
334         ioreq->presync = 0;
335         ioreq_runio_qemu_aio(ioreq);
336         return;
337     }
338     if (ioreq->aio_inflight > 0) {
339         return;
340     }
341     if (ioreq->postsync) {
342         ioreq->postsync = 0;
343         ioreq->aio_inflight++;
344         bdrv_aio_flush(ioreq->blkdev->bs, qemu_aio_complete, ioreq);
345         return;
346     }
347
348     ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
349     ioreq_unmap(ioreq);
350     ioreq_finish(ioreq);
351     bdrv_acct_done(ioreq->blkdev->bs, &ioreq->acct);
352     qemu_bh_schedule(ioreq->blkdev->bh);
353 }
354
355 static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
356 {
357     struct XenBlkDev *blkdev = ioreq->blkdev;
358
359     if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
360         goto err_no_map;
361     }
362
363     ioreq->aio_inflight++;
364     if (ioreq->presync) {
365         bdrv_aio_flush(ioreq->blkdev->bs, qemu_aio_complete, ioreq);
366         return 0;
367     }
368
369     switch (ioreq->req.operation) {
370     case BLKIF_OP_READ:
371         bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_READ);
372         ioreq->aio_inflight++;
373         bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
374                        &ioreq->v, ioreq->v.size / BLOCK_SIZE,
375                        qemu_aio_complete, ioreq);
376         break;
377     case BLKIF_OP_WRITE:
378     case BLKIF_OP_WRITE_BARRIER:
379         if (!ioreq->req.nr_segments) {
380             break;
381         }
382
383         bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_WRITE);
384         ioreq->aio_inflight++;
385         bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
386                         &ioreq->v, ioreq->v.size / BLOCK_SIZE,
387                         qemu_aio_complete, ioreq);
388         break;
389     default:
390         /* unknown operation (shouldn't happen -- parse catches this) */
391         goto err;
392     }
393
394     qemu_aio_complete(ioreq, 0);
395
396     return 0;
397
398 err:
399     ioreq_unmap(ioreq);
400 err_no_map:
401     ioreq_finish(ioreq);
402     ioreq->status = BLKIF_RSP_ERROR;
403     return -1;
404 }
405
406 static int blk_send_response_one(struct ioreq *ioreq)
407 {
408     struct XenBlkDev  *blkdev = ioreq->blkdev;
409     int               send_notify   = 0;
410     int               have_requests = 0;
411     blkif_response_t  resp;
412     void              *dst;
413
414     resp.id        = ioreq->req.id;
415     resp.operation = ioreq->req.operation;
416     resp.status    = ioreq->status;
417
418     /* Place on the response ring for the relevant domain. */
419     switch (blkdev->protocol) {
420     case BLKIF_PROTOCOL_NATIVE:
421         dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
422         break;
423     case BLKIF_PROTOCOL_X86_32:
424         dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
425                                 blkdev->rings.x86_32_part.rsp_prod_pvt);
426         break;
427     case BLKIF_PROTOCOL_X86_64:
428         dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
429                                 blkdev->rings.x86_64_part.rsp_prod_pvt);
430         break;
431     default:
432         dst = NULL;
433     }
434     memcpy(dst, &resp, sizeof(resp));
435     blkdev->rings.common.rsp_prod_pvt++;
436
437     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
438     if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) {
439         /*
440          * Tail check for pending requests. Allows frontend to avoid
441          * notifications if requests are already in flight (lower
442          * overheads and promotes batching).
443          */
444         RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests);
445     } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) {
446         have_requests = 1;
447     }
448
449     if (have_requests) {
450         blkdev->more_work++;
451     }
452     return send_notify;
453 }
454
455 /* walk finished list, send outstanding responses, free requests */
456 static void blk_send_response_all(struct XenBlkDev *blkdev)
457 {
458     struct ioreq *ioreq;
459     int send_notify = 0;
460
461     while (!QLIST_EMPTY(&blkdev->finished)) {
462         ioreq = QLIST_FIRST(&blkdev->finished);
463         send_notify += blk_send_response_one(ioreq);
464         ioreq_release(ioreq, true);
465     }
466     if (send_notify) {
467         xen_be_send_notify(&blkdev->xendev);
468     }
469 }
470
471 static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc)
472 {
473     switch (blkdev->protocol) {
474     case BLKIF_PROTOCOL_NATIVE:
475         memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc),
476                sizeof(ioreq->req));
477         break;
478     case BLKIF_PROTOCOL_X86_32:
479         blkif_get_x86_32_req(&ioreq->req,
480                              RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc));
481         break;
482     case BLKIF_PROTOCOL_X86_64:
483         blkif_get_x86_64_req(&ioreq->req,
484                              RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc));
485         break;
486     }
487     return 0;
488 }
489
490 static void blk_handle_requests(struct XenBlkDev *blkdev)
491 {
492     RING_IDX rc, rp;
493     struct ioreq *ioreq;
494
495     blkdev->more_work = 0;
496
497     rc = blkdev->rings.common.req_cons;
498     rp = blkdev->rings.common.sring->req_prod;
499     xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
500
501     blk_send_response_all(blkdev);
502     while (rc != rp) {
503         /* pull request from ring */
504         if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
505             break;
506         }
507         ioreq = ioreq_start(blkdev);
508         if (ioreq == NULL) {
509             blkdev->more_work++;
510             break;
511         }
512         blk_get_request(blkdev, ioreq, rc);
513         blkdev->rings.common.req_cons = ++rc;
514
515         /* parse them */
516         if (ioreq_parse(ioreq) != 0) {
517             if (blk_send_response_one(ioreq)) {
518                 xen_be_send_notify(&blkdev->xendev);
519             }
520             ioreq_release(ioreq, false);
521             continue;
522         }
523
524         ioreq_runio_qemu_aio(ioreq);
525     }
526
527     if (blkdev->more_work && blkdev->requests_inflight < max_requests) {
528         qemu_bh_schedule(blkdev->bh);
529     }
530 }
531
532 /* ------------------------------------------------------------- */
533
534 static void blk_bh(void *opaque)
535 {
536     struct XenBlkDev *blkdev = opaque;
537     blk_handle_requests(blkdev);
538 }
539
540 /*
541  * We need to account for the grant allocations requiring contiguous
542  * chunks; the worst case number would be
543  *     max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1,
544  * but in order to keep things simple just use
545  *     2 * max_req * max_seg.
546  */
547 #define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg))
548
549 static void blk_alloc(struct XenDevice *xendev)
550 {
551     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
552
553     QLIST_INIT(&blkdev->inflight);
554     QLIST_INIT(&blkdev->finished);
555     QLIST_INIT(&blkdev->freelist);
556     blkdev->bh = qemu_bh_new(blk_bh, blkdev);
557     if (xen_mode != XEN_EMULATE) {
558         batch_maps = 1;
559     }
560     if (xc_gnttab_set_max_grants(xendev->gnttabdev,
561             MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
562         xen_be_printf(xendev, 0, "xc_gnttab_set_max_grants failed: %s\n",
563                       strerror(errno));
564     }
565 }
566
567 static int blk_init(struct XenDevice *xendev)
568 {
569     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
570     int index, qflags, info = 0;
571
572     /* read xenstore entries */
573     if (blkdev->params == NULL) {
574         char *h = NULL;
575         blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params");
576         if (blkdev->params != NULL) {
577             h = strchr(blkdev->params, ':');
578         }
579         if (h != NULL) {
580             blkdev->fileproto = blkdev->params;
581             blkdev->filename  = h+1;
582             *h = 0;
583         } else {
584             blkdev->fileproto = "<unset>";
585             blkdev->filename  = blkdev->params;
586         }
587     }
588     if (!strcmp("aio", blkdev->fileproto)) {
589         blkdev->fileproto = "raw";
590     }
591     if (blkdev->mode == NULL) {
592         blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode");
593     }
594     if (blkdev->type == NULL) {
595         blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type");
596     }
597     if (blkdev->dev == NULL) {
598         blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev");
599     }
600     if (blkdev->devtype == NULL) {
601         blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type");
602     }
603
604     /* do we have all we need? */
605     if (blkdev->params == NULL ||
606         blkdev->mode == NULL   ||
607         blkdev->type == NULL   ||
608         blkdev->dev == NULL) {
609         goto out_error;
610     }
611
612     /* read-only ? */
613     qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
614     if (strcmp(blkdev->mode, "w") == 0) {
615         qflags |= BDRV_O_RDWR;
616     } else {
617         info  |= VDISK_READONLY;
618     }
619
620     /* cdrom ? */
621     if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) {
622         info  |= VDISK_CDROM;
623     }
624
625     /* init qemu block driver */
626     index = (blkdev->xendev.dev - 202 * 256) / 16;
627     blkdev->dinfo = drive_get(IF_XEN, 0, index);
628     if (!blkdev->dinfo) {
629         /* setup via xenbus -> create new block driver instance */
630         xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
631         blkdev->bs = bdrv_new(blkdev->dev);
632         if (blkdev->bs) {
633             if (bdrv_open(blkdev->bs, blkdev->filename, qflags,
634                         bdrv_find_whitelisted_format(blkdev->fileproto)) != 0) {
635                 bdrv_delete(blkdev->bs);
636                 blkdev->bs = NULL;
637             }
638         }
639         if (!blkdev->bs) {
640             goto out_error;
641         }
642     } else {
643         /* setup via qemu cmdline -> already setup for us */
644         xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
645         blkdev->bs = blkdev->dinfo->bdrv;
646     }
647     bdrv_attach_dev_nofail(blkdev->bs, blkdev);
648     blkdev->file_blk  = BLOCK_SIZE;
649     blkdev->file_size = bdrv_getlength(blkdev->bs);
650     if (blkdev->file_size < 0) {
651         xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n",
652                       (int)blkdev->file_size, strerror(-blkdev->file_size),
653                       blkdev->bs->drv ? blkdev->bs->drv->format_name : "-");
654         blkdev->file_size = 0;
655     }
656
657     xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
658                   " size %" PRId64 " (%" PRId64 " MB)\n",
659                   blkdev->type, blkdev->fileproto, blkdev->filename,
660                   blkdev->file_size, blkdev->file_size >> 20);
661
662     /* fill info */
663     xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1);
664     xenstore_write_be_int(&blkdev->xendev, "info",            info);
665     xenstore_write_be_int(&blkdev->xendev, "sector-size",     blkdev->file_blk);
666     xenstore_write_be_int(&blkdev->xendev, "sectors",
667                           blkdev->file_size / blkdev->file_blk);
668     return 0;
669
670 out_error:
671     g_free(blkdev->params);
672     blkdev->params = NULL;
673     g_free(blkdev->mode);
674     blkdev->mode = NULL;
675     g_free(blkdev->type);
676     blkdev->type = NULL;
677     g_free(blkdev->dev);
678     blkdev->dev = NULL;
679     g_free(blkdev->devtype);
680     blkdev->devtype = NULL;
681     return -1;
682 }
683
684 static int blk_connect(struct XenDevice *xendev)
685 {
686     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
687
688     if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) {
689         return -1;
690     }
691     if (xenstore_read_fe_int(&blkdev->xendev, "event-channel",
692                              &blkdev->xendev.remote_port) == -1) {
693         return -1;
694     }
695
696     blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
697     if (blkdev->xendev.protocol) {
698         if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
699             blkdev->protocol = BLKIF_PROTOCOL_X86_32;
700         }
701         if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
702             blkdev->protocol = BLKIF_PROTOCOL_X86_64;
703         }
704     }
705
706     blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev,
707                                             blkdev->xendev.dom,
708                                             blkdev->ring_ref,
709                                             PROT_READ | PROT_WRITE);
710     if (!blkdev->sring) {
711         return -1;
712     }
713     blkdev->cnt_map++;
714
715     switch (blkdev->protocol) {
716     case BLKIF_PROTOCOL_NATIVE:
717     {
718         blkif_sring_t *sring_native = blkdev->sring;
719         BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE);
720         break;
721     }
722     case BLKIF_PROTOCOL_X86_32:
723     {
724         blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring;
725
726         BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE);
727         break;
728     }
729     case BLKIF_PROTOCOL_X86_64:
730     {
731         blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring;
732
733         BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE);
734         break;
735     }
736     }
737
738     xen_be_bind_evtchn(&blkdev->xendev);
739
740     xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
741                   "remote port %d, local port %d\n",
742                   blkdev->xendev.protocol, blkdev->ring_ref,
743                   blkdev->xendev.remote_port, blkdev->xendev.local_port);
744     return 0;
745 }
746
747 static void blk_disconnect(struct XenDevice *xendev)
748 {
749     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
750
751     if (blkdev->bs) {
752         if (!blkdev->dinfo) {
753             /* close/delete only if we created it ourself */
754             bdrv_close(blkdev->bs);
755             bdrv_detach_dev(blkdev->bs, blkdev);
756             bdrv_delete(blkdev->bs);
757         }
758         blkdev->bs = NULL;
759     }
760     xen_be_unbind_evtchn(&blkdev->xendev);
761
762     if (blkdev->sring) {
763         xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
764         blkdev->cnt_map--;
765         blkdev->sring = NULL;
766     }
767 }
768
769 static int blk_free(struct XenDevice *xendev)
770 {
771     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
772     struct ioreq *ioreq;
773
774     if (blkdev->bs || blkdev->sring) {
775         blk_disconnect(xendev);
776     }
777
778     while (!QLIST_EMPTY(&blkdev->freelist)) {
779         ioreq = QLIST_FIRST(&blkdev->freelist);
780         QLIST_REMOVE(ioreq, list);
781         qemu_iovec_destroy(&ioreq->v);
782         g_free(ioreq);
783     }
784
785     g_free(blkdev->params);
786     g_free(blkdev->mode);
787     g_free(blkdev->type);
788     g_free(blkdev->dev);
789     g_free(blkdev->devtype);
790     qemu_bh_delete(blkdev->bh);
791     return 0;
792 }
793
794 static void blk_event(struct XenDevice *xendev)
795 {
796     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
797
798     qemu_bh_schedule(blkdev->bh);
799 }
800
801 struct XenDevOps xen_blkdev_ops = {
802     .size       = sizeof(struct XenBlkDev),
803     .flags      = DEVOPS_FLAG_NEED_GNTDEV,
804     .alloc      = blk_alloc,
805     .init       = blk_init,
806     .initialise    = blk_connect,
807     .disconnect = blk_disconnect,
808     .event      = blk_event,
809     .free       = blk_free,
810 };
This page took 0.074832 seconds and 4 git commands to generate.