]>
Commit | Line | Data |
---|---|---|
62d23efa AL |
1 | /* |
2 | * xen paravirt block device backend | |
3 | * | |
4 | * (c) Gerd Hoffmann <[email protected]> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; under version 2 of the License. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * GNU General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License along | |
8167ee88 | 16 | * with this program; if not, see <http://www.gnu.org/licenses/>. |
6b620ca3 PB |
17 | * |
18 | * Contributions after 2012-01-13 are licensed under the terms of the | |
19 | * GNU GPL, version 2 or (at your option) any later version. | |
62d23efa AL |
20 | */ |
21 | ||
22 | #include <stdio.h> | |
23 | #include <stdlib.h> | |
24 | #include <stdarg.h> | |
25 | #include <string.h> | |
26 | #include <unistd.h> | |
27 | #include <signal.h> | |
28 | #include <inttypes.h> | |
29 | #include <time.h> | |
30 | #include <fcntl.h> | |
31 | #include <errno.h> | |
32 | #include <sys/ioctl.h> | |
33 | #include <sys/types.h> | |
34 | #include <sys/stat.h> | |
35 | #include <sys/mman.h> | |
36 | #include <sys/uio.h> | |
37 | ||
62d23efa | 38 | #include "hw.h" |
62d23efa | 39 | #include "xen_backend.h" |
b41f6719 | 40 | #include "xen_blkif.h" |
9c17d615 | 41 | #include "sysemu/blockdev.h" |
62d23efa AL |
42 | |
43 | /* ------------------------------------------------------------- */ | |
44 | ||
62d23efa AL |
45 | static int batch_maps = 0; |
46 | ||
47 | static int max_requests = 32; | |
62d23efa AL |
48 | |
49 | /* ------------------------------------------------------------- */ | |
50 | ||
51 | #define BLOCK_SIZE 512 | |
52 | #define IOCB_COUNT (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2) | |
53 | ||
54 | struct ioreq { | |
55 | blkif_request_t req; | |
56 | int16_t status; | |
57 | ||
58 | /* parsed request */ | |
59 | off_t start; | |
60 | QEMUIOVector v; | |
61 | int presync; | |
62 | int postsync; | |
c6961b7d | 63 | uint8_t mapped; |
62d23efa AL |
64 | |
65 | /* grant mapping */ | |
66 | uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
67 | uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
68 | int prot; | |
69 | void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
70 | void *pages; | |
71 | ||
72 | /* aio status */ | |
73 | int aio_inflight; | |
74 | int aio_errors; | |
75 | ||
76 | struct XenBlkDev *blkdev; | |
72cf2d4f | 77 | QLIST_ENTRY(ioreq) list; |
a597e79c | 78 | BlockAcctCookie acct; |
62d23efa AL |
79 | }; |
80 | ||
81 | struct XenBlkDev { | |
82 | struct XenDevice xendev; /* must be first */ | |
83 | char *params; | |
84 | char *mode; | |
85 | char *type; | |
86 | char *dev; | |
87 | char *devtype; | |
88 | const char *fileproto; | |
89 | const char *filename; | |
90 | int ring_ref; | |
91 | void *sring; | |
92 | int64_t file_blk; | |
93 | int64_t file_size; | |
94 | int protocol; | |
95 | blkif_back_rings_t rings; | |
96 | int more_work; | |
97 | int cnt_map; | |
98 | ||
99 | /* request lists */ | |
72cf2d4f BS |
100 | QLIST_HEAD(inflight_head, ioreq) inflight; |
101 | QLIST_HEAD(finished_head, ioreq) finished; | |
102 | QLIST_HEAD(freelist_head, ioreq) freelist; | |
62d23efa AL |
103 | int requests_total; |
104 | int requests_inflight; | |
105 | int requests_finished; | |
106 | ||
107 | /* qemu block driver */ | |
751c6a17 | 108 | DriveInfo *dinfo; |
62d23efa AL |
109 | BlockDriverState *bs; |
110 | QEMUBH *bh; | |
111 | }; | |
112 | ||
113 | /* ------------------------------------------------------------- */ | |
114 | ||
115 | static struct ioreq *ioreq_start(struct XenBlkDev *blkdev) | |
116 | { | |
117 | struct ioreq *ioreq = NULL; | |
118 | ||
72cf2d4f | 119 | if (QLIST_EMPTY(&blkdev->freelist)) { |
209cd7ab AP |
120 | if (blkdev->requests_total >= max_requests) { |
121 | goto out; | |
122 | } | |
123 | /* allocate new struct */ | |
7267c094 | 124 | ioreq = g_malloc0(sizeof(*ioreq)); |
209cd7ab AP |
125 | ioreq->blkdev = blkdev; |
126 | blkdev->requests_total++; | |
62d23efa AL |
127 | qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST); |
128 | } else { | |
209cd7ab AP |
129 | /* get one from freelist */ |
130 | ioreq = QLIST_FIRST(&blkdev->freelist); | |
131 | QLIST_REMOVE(ioreq, list); | |
62d23efa AL |
132 | qemu_iovec_reset(&ioreq->v); |
133 | } | |
72cf2d4f | 134 | QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list); |
62d23efa AL |
135 | blkdev->requests_inflight++; |
136 | ||
137 | out: | |
138 | return ioreq; | |
139 | } | |
140 | ||
141 | static void ioreq_finish(struct ioreq *ioreq) | |
142 | { | |
143 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
144 | ||
72cf2d4f BS |
145 | QLIST_REMOVE(ioreq, list); |
146 | QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list); | |
62d23efa AL |
147 | blkdev->requests_inflight--; |
148 | blkdev->requests_finished++; | |
149 | } | |
150 | ||
ed547766 | 151 | static void ioreq_release(struct ioreq *ioreq, bool finish) |
62d23efa AL |
152 | { |
153 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
154 | ||
72cf2d4f | 155 | QLIST_REMOVE(ioreq, list); |
62d23efa AL |
156 | memset(ioreq, 0, sizeof(*ioreq)); |
157 | ioreq->blkdev = blkdev; | |
72cf2d4f | 158 | QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list); |
ed547766 JB |
159 | if (finish) { |
160 | blkdev->requests_finished--; | |
161 | } else { | |
162 | blkdev->requests_inflight--; | |
163 | } | |
62d23efa AL |
164 | } |
165 | ||
166 | /* | |
167 | * translate request into iovec + start offset | |
168 | * do sanity checks along the way | |
169 | */ | |
170 | static int ioreq_parse(struct ioreq *ioreq) | |
171 | { | |
172 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
173 | uintptr_t mem; | |
174 | size_t len; | |
175 | int i; | |
176 | ||
177 | xen_be_printf(&blkdev->xendev, 3, | |
209cd7ab AP |
178 | "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n", |
179 | ioreq->req.operation, ioreq->req.nr_segments, | |
180 | ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number); | |
62d23efa AL |
181 | switch (ioreq->req.operation) { |
182 | case BLKIF_OP_READ: | |
209cd7ab AP |
183 | ioreq->prot = PROT_WRITE; /* to memory */ |
184 | break; | |
62d23efa | 185 | case BLKIF_OP_WRITE_BARRIER: |
5cbdebe3 SS |
186 | if (!ioreq->req.nr_segments) { |
187 | ioreq->presync = 1; | |
188 | return 0; | |
189 | } | |
ba1dffed | 190 | ioreq->presync = ioreq->postsync = 1; |
209cd7ab | 191 | /* fall through */ |
62d23efa | 192 | case BLKIF_OP_WRITE: |
209cd7ab | 193 | ioreq->prot = PROT_READ; /* from memory */ |
209cd7ab | 194 | break; |
62d23efa | 195 | default: |
209cd7ab AP |
196 | xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n", |
197 | ioreq->req.operation); | |
198 | goto err; | |
62d23efa AL |
199 | }; |
200 | ||
908c7b9f GH |
201 | if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') { |
202 | xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n"); | |
203 | goto err; | |
204 | } | |
205 | ||
62d23efa AL |
206 | ioreq->start = ioreq->req.sector_number * blkdev->file_blk; |
207 | for (i = 0; i < ioreq->req.nr_segments; i++) { | |
209cd7ab AP |
208 | if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { |
209 | xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n"); | |
210 | goto err; | |
211 | } | |
212 | if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) { | |
213 | xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n"); | |
214 | goto err; | |
215 | } | |
216 | if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) { | |
217 | xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n"); | |
218 | goto err; | |
219 | } | |
220 | ||
221 | ioreq->domids[i] = blkdev->xendev.dom; | |
222 | ioreq->refs[i] = ioreq->req.seg[i].gref; | |
223 | ||
224 | mem = ioreq->req.seg[i].first_sect * blkdev->file_blk; | |
225 | len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk; | |
62d23efa AL |
226 | qemu_iovec_add(&ioreq->v, (void*)mem, len); |
227 | } | |
228 | if (ioreq->start + ioreq->v.size > blkdev->file_size) { | |
209cd7ab AP |
229 | xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n"); |
230 | goto err; | |
62d23efa AL |
231 | } |
232 | return 0; | |
233 | ||
234 | err: | |
235 | ioreq->status = BLKIF_RSP_ERROR; | |
236 | return -1; | |
237 | } | |
238 | ||
239 | static void ioreq_unmap(struct ioreq *ioreq) | |
240 | { | |
d5b93ddf | 241 | XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; |
62d23efa AL |
242 | int i; |
243 | ||
c6961b7d | 244 | if (ioreq->v.niov == 0 || ioreq->mapped == 0) { |
62d23efa | 245 | return; |
209cd7ab | 246 | } |
62d23efa | 247 | if (batch_maps) { |
209cd7ab AP |
248 | if (!ioreq->pages) { |
249 | return; | |
250 | } | |
251 | if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) { | |
252 | xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", | |
253 | strerror(errno)); | |
254 | } | |
255 | ioreq->blkdev->cnt_map -= ioreq->v.niov; | |
256 | ioreq->pages = NULL; | |
62d23efa | 257 | } else { |
209cd7ab AP |
258 | for (i = 0; i < ioreq->v.niov; i++) { |
259 | if (!ioreq->page[i]) { | |
260 | continue; | |
261 | } | |
262 | if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) { | |
263 | xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", | |
264 | strerror(errno)); | |
265 | } | |
266 | ioreq->blkdev->cnt_map--; | |
267 | ioreq->page[i] = NULL; | |
268 | } | |
62d23efa | 269 | } |
c6961b7d | 270 | ioreq->mapped = 0; |
62d23efa AL |
271 | } |
272 | ||
273 | static int ioreq_map(struct ioreq *ioreq) | |
274 | { | |
d5b93ddf | 275 | XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; |
62d23efa AL |
276 | int i; |
277 | ||
c6961b7d | 278 | if (ioreq->v.niov == 0 || ioreq->mapped == 1) { |
62d23efa | 279 | return 0; |
209cd7ab | 280 | } |
62d23efa | 281 | if (batch_maps) { |
209cd7ab AP |
282 | ioreq->pages = xc_gnttab_map_grant_refs |
283 | (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot); | |
284 | if (ioreq->pages == NULL) { | |
285 | xen_be_printf(&ioreq->blkdev->xendev, 0, | |
286 | "can't map %d grant refs (%s, %d maps)\n", | |
287 | ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map); | |
288 | return -1; | |
289 | } | |
290 | for (i = 0; i < ioreq->v.niov; i++) { | |
291 | ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE + | |
292 | (uintptr_t)ioreq->v.iov[i].iov_base; | |
293 | } | |
294 | ioreq->blkdev->cnt_map += ioreq->v.niov; | |
62d23efa | 295 | } else { |
209cd7ab AP |
296 | for (i = 0; i < ioreq->v.niov; i++) { |
297 | ioreq->page[i] = xc_gnttab_map_grant_ref | |
298 | (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot); | |
299 | if (ioreq->page[i] == NULL) { | |
300 | xen_be_printf(&ioreq->blkdev->xendev, 0, | |
301 | "can't map grant ref %d (%s, %d maps)\n", | |
302 | ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map); | |
303 | ioreq_unmap(ioreq); | |
304 | return -1; | |
305 | } | |
306 | ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base; | |
307 | ioreq->blkdev->cnt_map++; | |
308 | } | |
62d23efa | 309 | } |
c6961b7d | 310 | ioreq->mapped = 1; |
62d23efa AL |
311 | return 0; |
312 | } | |
313 | ||
c6961b7d SS |
314 | static int ioreq_runio_qemu_aio(struct ioreq *ioreq); |
315 | ||
62d23efa AL |
316 | static void qemu_aio_complete(void *opaque, int ret) |
317 | { | |
318 | struct ioreq *ioreq = opaque; | |
319 | ||
320 | if (ret != 0) { | |
321 | xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n", | |
322 | ioreq->req.operation == BLKIF_OP_READ ? "read" : "write"); | |
323 | ioreq->aio_errors++; | |
324 | } | |
325 | ||
326 | ioreq->aio_inflight--; | |
c6961b7d SS |
327 | if (ioreq->presync) { |
328 | ioreq->presync = 0; | |
329 | ioreq_runio_qemu_aio(ioreq); | |
330 | return; | |
331 | } | |
209cd7ab | 332 | if (ioreq->aio_inflight > 0) { |
62d23efa | 333 | return; |
209cd7ab | 334 | } |
d56de074 | 335 | if (ioreq->postsync) { |
c6961b7d SS |
336 | ioreq->postsync = 0; |
337 | ioreq->aio_inflight++; | |
338 | bdrv_aio_flush(ioreq->blkdev->bs, qemu_aio_complete, ioreq); | |
339 | return; | |
d56de074 | 340 | } |
62d23efa AL |
341 | |
342 | ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; | |
343 | ioreq_unmap(ioreq); | |
344 | ioreq_finish(ioreq); | |
a597e79c | 345 | bdrv_acct_done(ioreq->blkdev->bs, &ioreq->acct); |
62d23efa AL |
346 | qemu_bh_schedule(ioreq->blkdev->bh); |
347 | } | |
348 | ||
349 | static int ioreq_runio_qemu_aio(struct ioreq *ioreq) | |
350 | { | |
351 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
352 | ||
209cd7ab AP |
353 | if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) { |
354 | goto err_no_map; | |
355 | } | |
62d23efa AL |
356 | |
357 | ioreq->aio_inflight++; | |
209cd7ab | 358 | if (ioreq->presync) { |
c6961b7d SS |
359 | bdrv_aio_flush(ioreq->blkdev->bs, qemu_aio_complete, ioreq); |
360 | return 0; | |
209cd7ab | 361 | } |
62d23efa AL |
362 | |
363 | switch (ioreq->req.operation) { | |
364 | case BLKIF_OP_READ: | |
a597e79c | 365 | bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_READ); |
62d23efa AL |
366 | ioreq->aio_inflight++; |
367 | bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE, | |
368 | &ioreq->v, ioreq->v.size / BLOCK_SIZE, | |
369 | qemu_aio_complete, ioreq); | |
209cd7ab | 370 | break; |
62d23efa AL |
371 | case BLKIF_OP_WRITE: |
372 | case BLKIF_OP_WRITE_BARRIER: | |
209cd7ab | 373 | if (!ioreq->req.nr_segments) { |
5cbdebe3 | 374 | break; |
209cd7ab | 375 | } |
a597e79c CH |
376 | |
377 | bdrv_acct_start(blkdev->bs, &ioreq->acct, ioreq->v.size, BDRV_ACCT_WRITE); | |
209bef3e | 378 | ioreq->aio_inflight++; |
62d23efa AL |
379 | bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE, |
380 | &ioreq->v, ioreq->v.size / BLOCK_SIZE, | |
381 | qemu_aio_complete, ioreq); | |
209cd7ab | 382 | break; |
62d23efa | 383 | default: |
209cd7ab AP |
384 | /* unknown operation (shouldn't happen -- parse catches this) */ |
385 | goto err; | |
62d23efa AL |
386 | } |
387 | ||
62d23efa AL |
388 | qemu_aio_complete(ioreq, 0); |
389 | ||
390 | return 0; | |
391 | ||
392 | err: | |
f6ec953c FZ |
393 | ioreq_unmap(ioreq); |
394 | err_no_map: | |
395 | ioreq_finish(ioreq); | |
62d23efa AL |
396 | ioreq->status = BLKIF_RSP_ERROR; |
397 | return -1; | |
398 | } | |
399 | ||
400 | static int blk_send_response_one(struct ioreq *ioreq) | |
401 | { | |
402 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
403 | int send_notify = 0; | |
404 | int have_requests = 0; | |
405 | blkif_response_t resp; | |
406 | void *dst; | |
407 | ||
408 | resp.id = ioreq->req.id; | |
409 | resp.operation = ioreq->req.operation; | |
410 | resp.status = ioreq->status; | |
411 | ||
412 | /* Place on the response ring for the relevant domain. */ | |
413 | switch (blkdev->protocol) { | |
414 | case BLKIF_PROTOCOL_NATIVE: | |
209cd7ab AP |
415 | dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt); |
416 | break; | |
62d23efa | 417 | case BLKIF_PROTOCOL_X86_32: |
6fcfeff9 BS |
418 | dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part, |
419 | blkdev->rings.x86_32_part.rsp_prod_pvt); | |
209cd7ab | 420 | break; |
62d23efa | 421 | case BLKIF_PROTOCOL_X86_64: |
6fcfeff9 BS |
422 | dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part, |
423 | blkdev->rings.x86_64_part.rsp_prod_pvt); | |
209cd7ab | 424 | break; |
62d23efa | 425 | default: |
209cd7ab | 426 | dst = NULL; |
62d23efa AL |
427 | } |
428 | memcpy(dst, &resp, sizeof(resp)); | |
429 | blkdev->rings.common.rsp_prod_pvt++; | |
430 | ||
431 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify); | |
432 | if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) { | |
209cd7ab AP |
433 | /* |
434 | * Tail check for pending requests. Allows frontend to avoid | |
435 | * notifications if requests are already in flight (lower | |
436 | * overheads and promotes batching). | |
437 | */ | |
438 | RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests); | |
62d23efa | 439 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) { |
209cd7ab | 440 | have_requests = 1; |
62d23efa AL |
441 | } |
442 | ||
209cd7ab AP |
443 | if (have_requests) { |
444 | blkdev->more_work++; | |
445 | } | |
62d23efa AL |
446 | return send_notify; |
447 | } | |
448 | ||
449 | /* walk finished list, send outstanding responses, free requests */ | |
450 | static void blk_send_response_all(struct XenBlkDev *blkdev) | |
451 | { | |
452 | struct ioreq *ioreq; | |
453 | int send_notify = 0; | |
454 | ||
72cf2d4f BS |
455 | while (!QLIST_EMPTY(&blkdev->finished)) { |
456 | ioreq = QLIST_FIRST(&blkdev->finished); | |
209cd7ab | 457 | send_notify += blk_send_response_one(ioreq); |
ed547766 | 458 | ioreq_release(ioreq, true); |
209cd7ab AP |
459 | } |
460 | if (send_notify) { | |
461 | xen_be_send_notify(&blkdev->xendev); | |
62d23efa | 462 | } |
62d23efa AL |
463 | } |
464 | ||
465 | static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc) | |
466 | { | |
467 | switch (blkdev->protocol) { | |
468 | case BLKIF_PROTOCOL_NATIVE: | |
209cd7ab AP |
469 | memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc), |
470 | sizeof(ioreq->req)); | |
471 | break; | |
62d23efa | 472 | case BLKIF_PROTOCOL_X86_32: |
6fcfeff9 BS |
473 | blkif_get_x86_32_req(&ioreq->req, |
474 | RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc)); | |
209cd7ab | 475 | break; |
62d23efa | 476 | case BLKIF_PROTOCOL_X86_64: |
6fcfeff9 BS |
477 | blkif_get_x86_64_req(&ioreq->req, |
478 | RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc)); | |
209cd7ab | 479 | break; |
62d23efa AL |
480 | } |
481 | return 0; | |
482 | } | |
483 | ||
484 | static void blk_handle_requests(struct XenBlkDev *blkdev) | |
485 | { | |
486 | RING_IDX rc, rp; | |
487 | struct ioreq *ioreq; | |
488 | ||
489 | blkdev->more_work = 0; | |
490 | ||
491 | rc = blkdev->rings.common.req_cons; | |
492 | rp = blkdev->rings.common.sring->req_prod; | |
493 | xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ | |
494 | ||
4e5b184d | 495 | blk_send_response_all(blkdev); |
fc1f79f7 | 496 | while (rc != rp) { |
62d23efa | 497 | /* pull request from ring */ |
209cd7ab | 498 | if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) { |
62d23efa | 499 | break; |
209cd7ab | 500 | } |
62d23efa AL |
501 | ioreq = ioreq_start(blkdev); |
502 | if (ioreq == NULL) { | |
503 | blkdev->more_work++; | |
504 | break; | |
505 | } | |
506 | blk_get_request(blkdev, ioreq, rc); | |
507 | blkdev->rings.common.req_cons = ++rc; | |
508 | ||
509 | /* parse them */ | |
510 | if (ioreq_parse(ioreq) != 0) { | |
209cd7ab | 511 | if (blk_send_response_one(ioreq)) { |
62d23efa | 512 | xen_be_send_notify(&blkdev->xendev); |
209cd7ab | 513 | } |
ed547766 | 514 | ioreq_release(ioreq, false); |
62d23efa AL |
515 | continue; |
516 | } | |
517 | ||
4e5b184d | 518 | ioreq_runio_qemu_aio(ioreq); |
209cd7ab | 519 | } |
62d23efa | 520 | |
209cd7ab | 521 | if (blkdev->more_work && blkdev->requests_inflight < max_requests) { |
62d23efa | 522 | qemu_bh_schedule(blkdev->bh); |
209cd7ab | 523 | } |
62d23efa AL |
524 | } |
525 | ||
526 | /* ------------------------------------------------------------- */ | |
527 | ||
528 | static void blk_bh(void *opaque) | |
529 | { | |
530 | struct XenBlkDev *blkdev = opaque; | |
531 | blk_handle_requests(blkdev); | |
532 | } | |
533 | ||
64c27e5b JB |
534 | /* |
535 | * We need to account for the grant allocations requiring contiguous | |
536 | * chunks; the worst case number would be | |
537 | * max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1, | |
538 | * but in order to keep things simple just use | |
539 | * 2 * max_req * max_seg. | |
540 | */ | |
541 | #define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg)) | |
542 | ||
62d23efa AL |
543 | static void blk_alloc(struct XenDevice *xendev) |
544 | { | |
545 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
546 | ||
72cf2d4f BS |
547 | QLIST_INIT(&blkdev->inflight); |
548 | QLIST_INIT(&blkdev->finished); | |
549 | QLIST_INIT(&blkdev->freelist); | |
62d23efa | 550 | blkdev->bh = qemu_bh_new(blk_bh, blkdev); |
209cd7ab | 551 | if (xen_mode != XEN_EMULATE) { |
62d23efa | 552 | batch_maps = 1; |
209cd7ab | 553 | } |
64c27e5b JB |
554 | if (xc_gnttab_set_max_grants(xendev->gnttabdev, |
555 | MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) { | |
556 | xen_be_printf(xendev, 0, "xc_gnttab_set_max_grants failed: %s\n", | |
557 | strerror(errno)); | |
558 | } | |
62d23efa AL |
559 | } |
560 | ||
561 | static int blk_init(struct XenDevice *xendev) | |
562 | { | |
563 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
5cce43bb | 564 | int index, qflags, info = 0; |
62d23efa AL |
565 | |
566 | /* read xenstore entries */ | |
567 | if (blkdev->params == NULL) { | |
5ea3c2b4 | 568 | char *h = NULL; |
209cd7ab | 569 | blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params"); |
5ea3c2b4 SS |
570 | if (blkdev->params != NULL) { |
571 | h = strchr(blkdev->params, ':'); | |
572 | } | |
209cd7ab AP |
573 | if (h != NULL) { |
574 | blkdev->fileproto = blkdev->params; | |
575 | blkdev->filename = h+1; | |
576 | *h = 0; | |
577 | } else { | |
578 | blkdev->fileproto = "<unset>"; | |
579 | blkdev->filename = blkdev->params; | |
580 | } | |
581 | } | |
7cef3f4f SS |
582 | if (!strcmp("aio", blkdev->fileproto)) { |
583 | blkdev->fileproto = "raw"; | |
584 | } | |
209cd7ab AP |
585 | if (blkdev->mode == NULL) { |
586 | blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode"); | |
587 | } | |
588 | if (blkdev->type == NULL) { | |
589 | blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type"); | |
590 | } | |
591 | if (blkdev->dev == NULL) { | |
592 | blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev"); | |
593 | } | |
594 | if (blkdev->devtype == NULL) { | |
595 | blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type"); | |
596 | } | |
62d23efa AL |
597 | |
598 | /* do we have all we need? */ | |
599 | if (blkdev->params == NULL || | |
209cd7ab AP |
600 | blkdev->mode == NULL || |
601 | blkdev->type == NULL || | |
602 | blkdev->dev == NULL) { | |
5ea3c2b4 | 603 | goto out_error; |
209cd7ab | 604 | } |
62d23efa AL |
605 | |
606 | /* read-only ? */ | |
82091410 | 607 | qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO; |
62d23efa | 608 | if (strcmp(blkdev->mode, "w") == 0) { |
82091410 | 609 | qflags |= BDRV_O_RDWR; |
62d23efa | 610 | } else { |
209cd7ab | 611 | info |= VDISK_READONLY; |
62d23efa AL |
612 | } |
613 | ||
614 | /* cdrom ? */ | |
209cd7ab AP |
615 | if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) { |
616 | info |= VDISK_CDROM; | |
617 | } | |
62d23efa AL |
618 | |
619 | /* init qemu block driver */ | |
751c6a17 GH |
620 | index = (blkdev->xendev.dev - 202 * 256) / 16; |
621 | blkdev->dinfo = drive_get(IF_XEN, 0, index); | |
622 | if (!blkdev->dinfo) { | |
62d23efa AL |
623 | /* setup via xenbus -> create new block driver instance */ |
624 | xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n"); | |
ad717139 | 625 | blkdev->bs = bdrv_new(blkdev->dev); |
5ea3c2b4 SS |
626 | if (blkdev->bs) { |
627 | if (bdrv_open(blkdev->bs, blkdev->filename, qflags, | |
628 | bdrv_find_whitelisted_format(blkdev->fileproto)) != 0) { | |
629 | bdrv_delete(blkdev->bs); | |
630 | blkdev->bs = NULL; | |
631 | } | |
632 | } | |
633 | if (!blkdev->bs) { | |
634 | goto out_error; | |
ad717139 | 635 | } |
62d23efa AL |
636 | } else { |
637 | /* setup via qemu cmdline -> already setup for us */ | |
638 | xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n"); | |
209cd7ab | 639 | blkdev->bs = blkdev->dinfo->bdrv; |
62d23efa | 640 | } |
fa879d62 | 641 | bdrv_attach_dev_nofail(blkdev->bs, blkdev); |
62d23efa AL |
642 | blkdev->file_blk = BLOCK_SIZE; |
643 | blkdev->file_size = bdrv_getlength(blkdev->bs); | |
644 | if (blkdev->file_size < 0) { | |
645 | xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n", | |
646 | (int)blkdev->file_size, strerror(-blkdev->file_size), | |
093003b1 | 647 | bdrv_get_format_name(blkdev->bs) ?: "-"); |
209cd7ab | 648 | blkdev->file_size = 0; |
62d23efa | 649 | } |
62d23efa AL |
650 | |
651 | xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\"," | |
209cd7ab AP |
652 | " size %" PRId64 " (%" PRId64 " MB)\n", |
653 | blkdev->type, blkdev->fileproto, blkdev->filename, | |
654 | blkdev->file_size, blkdev->file_size >> 20); | |
62d23efa AL |
655 | |
656 | /* fill info */ | |
5cce43bb | 657 | xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1); |
62d23efa AL |
658 | xenstore_write_be_int(&blkdev->xendev, "info", info); |
659 | xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); | |
660 | xenstore_write_be_int(&blkdev->xendev, "sectors", | |
209cd7ab | 661 | blkdev->file_size / blkdev->file_blk); |
62d23efa | 662 | return 0; |
5ea3c2b4 SS |
663 | |
664 | out_error: | |
7267c094 | 665 | g_free(blkdev->params); |
5ea3c2b4 | 666 | blkdev->params = NULL; |
7267c094 | 667 | g_free(blkdev->mode); |
5ea3c2b4 | 668 | blkdev->mode = NULL; |
7267c094 | 669 | g_free(blkdev->type); |
5ea3c2b4 | 670 | blkdev->type = NULL; |
7267c094 | 671 | g_free(blkdev->dev); |
5ea3c2b4 | 672 | blkdev->dev = NULL; |
7267c094 | 673 | g_free(blkdev->devtype); |
5ea3c2b4 SS |
674 | blkdev->devtype = NULL; |
675 | return -1; | |
62d23efa AL |
676 | } |
677 | ||
678 | static int blk_connect(struct XenDevice *xendev) | |
679 | { | |
680 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
681 | ||
209cd7ab AP |
682 | if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { |
683 | return -1; | |
684 | } | |
62d23efa | 685 | if (xenstore_read_fe_int(&blkdev->xendev, "event-channel", |
209cd7ab AP |
686 | &blkdev->xendev.remote_port) == -1) { |
687 | return -1; | |
688 | } | |
62d23efa AL |
689 | |
690 | blkdev->protocol = BLKIF_PROTOCOL_NATIVE; | |
691 | if (blkdev->xendev.protocol) { | |
209cd7ab | 692 | if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { |
62d23efa | 693 | blkdev->protocol = BLKIF_PROTOCOL_X86_32; |
209cd7ab AP |
694 | } |
695 | if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) { | |
62d23efa | 696 | blkdev->protocol = BLKIF_PROTOCOL_X86_64; |
209cd7ab | 697 | } |
62d23efa AL |
698 | } |
699 | ||
700 | blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev, | |
209cd7ab AP |
701 | blkdev->xendev.dom, |
702 | blkdev->ring_ref, | |
703 | PROT_READ | PROT_WRITE); | |
704 | if (!blkdev->sring) { | |
705 | return -1; | |
706 | } | |
62d23efa AL |
707 | blkdev->cnt_map++; |
708 | ||
709 | switch (blkdev->protocol) { | |
710 | case BLKIF_PROTOCOL_NATIVE: | |
711 | { | |
209cd7ab AP |
712 | blkif_sring_t *sring_native = blkdev->sring; |
713 | BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE); | |
714 | break; | |
62d23efa AL |
715 | } |
716 | case BLKIF_PROTOCOL_X86_32: | |
717 | { | |
209cd7ab | 718 | blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring; |
6fcfeff9 BS |
719 | |
720 | BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE); | |
209cd7ab | 721 | break; |
62d23efa AL |
722 | } |
723 | case BLKIF_PROTOCOL_X86_64: | |
724 | { | |
209cd7ab | 725 | blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring; |
6fcfeff9 BS |
726 | |
727 | BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE); | |
209cd7ab | 728 | break; |
62d23efa AL |
729 | } |
730 | } | |
731 | ||
732 | xen_be_bind_evtchn(&blkdev->xendev); | |
733 | ||
734 | xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, " | |
209cd7ab AP |
735 | "remote port %d, local port %d\n", |
736 | blkdev->xendev.protocol, blkdev->ring_ref, | |
737 | blkdev->xendev.remote_port, blkdev->xendev.local_port); | |
62d23efa AL |
738 | return 0; |
739 | } | |
740 | ||
741 | static void blk_disconnect(struct XenDevice *xendev) | |
742 | { | |
743 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
744 | ||
745 | if (blkdev->bs) { | |
751c6a17 | 746 | if (!blkdev->dinfo) { |
62d23efa AL |
747 | /* close/delete only if we created it ourself */ |
748 | bdrv_close(blkdev->bs); | |
7429f2e1 | 749 | bdrv_detach_dev(blkdev->bs, blkdev); |
62d23efa AL |
750 | bdrv_delete(blkdev->bs); |
751 | } | |
209cd7ab | 752 | blkdev->bs = NULL; |
62d23efa AL |
753 | } |
754 | xen_be_unbind_evtchn(&blkdev->xendev); | |
755 | ||
756 | if (blkdev->sring) { | |
209cd7ab AP |
757 | xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1); |
758 | blkdev->cnt_map--; | |
759 | blkdev->sring = NULL; | |
62d23efa AL |
760 | } |
761 | } | |
762 | ||
763 | static int blk_free(struct XenDevice *xendev) | |
764 | { | |
765 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
766 | struct ioreq *ioreq; | |
767 | ||
77ba8fef SS |
768 | if (blkdev->bs || blkdev->sring) { |
769 | blk_disconnect(xendev); | |
770 | } | |
771 | ||
72cf2d4f | 772 | while (!QLIST_EMPTY(&blkdev->freelist)) { |
209cd7ab | 773 | ioreq = QLIST_FIRST(&blkdev->freelist); |
72cf2d4f | 774 | QLIST_REMOVE(ioreq, list); |
62d23efa | 775 | qemu_iovec_destroy(&ioreq->v); |
7267c094 | 776 | g_free(ioreq); |
62d23efa AL |
777 | } |
778 | ||
7267c094 AL |
779 | g_free(blkdev->params); |
780 | g_free(blkdev->mode); | |
781 | g_free(blkdev->type); | |
782 | g_free(blkdev->dev); | |
783 | g_free(blkdev->devtype); | |
62d23efa AL |
784 | qemu_bh_delete(blkdev->bh); |
785 | return 0; | |
786 | } | |
787 | ||
788 | static void blk_event(struct XenDevice *xendev) | |
789 | { | |
790 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
791 | ||
792 | qemu_bh_schedule(blkdev->bh); | |
793 | } | |
794 | ||
795 | struct XenDevOps xen_blkdev_ops = { | |
796 | .size = sizeof(struct XenBlkDev), | |
797 | .flags = DEVOPS_FLAG_NEED_GNTDEV, | |
798 | .alloc = blk_alloc, | |
799 | .init = blk_init, | |
384087b2 | 800 | .initialise = blk_connect, |
62d23efa AL |
801 | .disconnect = blk_disconnect, |
802 | .event = blk_event, | |
803 | .free = blk_free, | |
804 | }; |