]>
Commit | Line | Data |
---|---|---|
62d23efa AL |
1 | /* |
2 | * xen paravirt block device backend | |
3 | * | |
4 | * (c) Gerd Hoffmann <[email protected]> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; under version 2 of the License. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * GNU General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License along | |
8167ee88 | 16 | * with this program; if not, see <http://www.gnu.org/licenses/>. |
6b620ca3 PB |
17 | * |
18 | * Contributions after 2012-01-13 are licensed under the terms of the | |
19 | * GNU GPL, version 2 or (at your option) any later version. | |
62d23efa AL |
20 | */ |
21 | ||
80c71a24 | 22 | #include "qemu/osdep.h" |
62d23efa | 23 | #include <sys/ioctl.h> |
62d23efa AL |
24 | #include <sys/uio.h> |
25 | ||
83c9f4ca | 26 | #include "hw/hw.h" |
0d09e41a | 27 | #include "hw/xen/xen_backend.h" |
47b43a1f | 28 | #include "xen_blkif.h" |
9c17d615 | 29 | #include "sysemu/blockdev.h" |
26f54e9a | 30 | #include "sysemu/block-backend.h" |
da34e65c | 31 | #include "qapi/error.h" |
9a925356 HR |
32 | #include "qapi/qmp/qdict.h" |
33 | #include "qapi/qmp/qstring.h" | |
62d23efa AL |
34 | |
35 | /* ------------------------------------------------------------- */ | |
36 | ||
62d23efa AL |
37 | static int batch_maps = 0; |
38 | ||
39 | static int max_requests = 32; | |
62d23efa AL |
40 | |
41 | /* ------------------------------------------------------------- */ | |
42 | ||
43 | #define BLOCK_SIZE 512 | |
44 | #define IOCB_COUNT (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2) | |
45 | ||
9e496d74 RPM |
46 | struct PersistentGrant { |
47 | void *page; | |
48 | struct XenBlkDev *blkdev; | |
49 | }; | |
50 | ||
51 | typedef struct PersistentGrant PersistentGrant; | |
52 | ||
2f01dfac RPM |
53 | struct PersistentRegion { |
54 | void *addr; | |
55 | int num; | |
56 | }; | |
57 | ||
58 | typedef struct PersistentRegion PersistentRegion; | |
59 | ||
62d23efa AL |
60 | struct ioreq { |
61 | blkif_request_t req; | |
62 | int16_t status; | |
63 | ||
64 | /* parsed request */ | |
65 | off_t start; | |
66 | QEMUIOVector v; | |
67 | int presync; | |
c6961b7d | 68 | uint8_t mapped; |
62d23efa AL |
69 | |
70 | /* grant mapping */ | |
71 | uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
72 | uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
73 | int prot; | |
74 | void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
75 | void *pages; | |
9e496d74 | 76 | int num_unmap; |
62d23efa AL |
77 | |
78 | /* aio status */ | |
79 | int aio_inflight; | |
80 | int aio_errors; | |
81 | ||
82 | struct XenBlkDev *blkdev; | |
72cf2d4f | 83 | QLIST_ENTRY(ioreq) list; |
a597e79c | 84 | BlockAcctCookie acct; |
62d23efa AL |
85 | }; |
86 | ||
87 | struct XenBlkDev { | |
88 | struct XenDevice xendev; /* must be first */ | |
89 | char *params; | |
90 | char *mode; | |
91 | char *type; | |
92 | char *dev; | |
93 | char *devtype; | |
454ae734 | 94 | bool directiosafe; |
62d23efa AL |
95 | const char *fileproto; |
96 | const char *filename; | |
97 | int ring_ref; | |
98 | void *sring; | |
99 | int64_t file_blk; | |
100 | int64_t file_size; | |
101 | int protocol; | |
102 | blkif_back_rings_t rings; | |
103 | int more_work; | |
104 | int cnt_map; | |
105 | ||
106 | /* request lists */ | |
72cf2d4f BS |
107 | QLIST_HEAD(inflight_head, ioreq) inflight; |
108 | QLIST_HEAD(finished_head, ioreq) finished; | |
109 | QLIST_HEAD(freelist_head, ioreq) freelist; | |
62d23efa AL |
110 | int requests_total; |
111 | int requests_inflight; | |
112 | int requests_finished; | |
113 | ||
9e496d74 | 114 | /* Persistent grants extension */ |
f3135204 | 115 | gboolean feature_discard; |
9e496d74 RPM |
116 | gboolean feature_persistent; |
117 | GTree *persistent_gnts; | |
2f01dfac | 118 | GSList *persistent_regions; |
9e496d74 RPM |
119 | unsigned int persistent_gnt_count; |
120 | unsigned int max_grants; | |
121 | ||
b6eb9b45 PS |
122 | /* Grant copy */ |
123 | gboolean feature_grant_copy; | |
124 | ||
62d23efa | 125 | /* qemu block driver */ |
751c6a17 | 126 | DriveInfo *dinfo; |
4be74634 | 127 | BlockBackend *blk; |
62d23efa AL |
128 | QEMUBH *bh; |
129 | }; | |
130 | ||
131 | /* ------------------------------------------------------------- */ | |
132 | ||
282c6a2f RPM |
133 | static void ioreq_reset(struct ioreq *ioreq) |
134 | { | |
135 | memset(&ioreq->req, 0, sizeof(ioreq->req)); | |
136 | ioreq->status = 0; | |
137 | ioreq->start = 0; | |
138 | ioreq->presync = 0; | |
282c6a2f RPM |
139 | ioreq->mapped = 0; |
140 | ||
141 | memset(ioreq->domids, 0, sizeof(ioreq->domids)); | |
142 | memset(ioreq->refs, 0, sizeof(ioreq->refs)); | |
143 | ioreq->prot = 0; | |
144 | memset(ioreq->page, 0, sizeof(ioreq->page)); | |
145 | ioreq->pages = NULL; | |
146 | ||
147 | ioreq->aio_inflight = 0; | |
148 | ioreq->aio_errors = 0; | |
149 | ||
150 | ioreq->blkdev = NULL; | |
151 | memset(&ioreq->list, 0, sizeof(ioreq->list)); | |
152 | memset(&ioreq->acct, 0, sizeof(ioreq->acct)); | |
153 | ||
154 | qemu_iovec_reset(&ioreq->v); | |
155 | } | |
156 | ||
9e496d74 RPM |
157 | static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) |
158 | { | |
159 | uint ua = GPOINTER_TO_UINT(a); | |
160 | uint ub = GPOINTER_TO_UINT(b); | |
161 | return (ua > ub) - (ua < ub); | |
162 | } | |
163 | ||
164 | static void destroy_grant(gpointer pgnt) | |
165 | { | |
166 | PersistentGrant *grant = pgnt; | |
c1345a88 | 167 | xengnttab_handle *gnt = grant->blkdev->xendev.gnttabdev; |
9e496d74 | 168 | |
c1345a88 | 169 | if (xengnttab_unmap(gnt, grant->page, 1) != 0) { |
96c77dba | 170 | xen_pv_printf(&grant->blkdev->xendev, 0, |
c1345a88 | 171 | "xengnttab_unmap failed: %s\n", |
9e496d74 RPM |
172 | strerror(errno)); |
173 | } | |
174 | grant->blkdev->persistent_gnt_count--; | |
96c77dba | 175 | xen_pv_printf(&grant->blkdev->xendev, 3, |
9e496d74 RPM |
176 | "unmapped grant %p\n", grant->page); |
177 | g_free(grant); | |
178 | } | |
179 | ||
2f01dfac RPM |
180 | static void remove_persistent_region(gpointer data, gpointer dev) |
181 | { | |
182 | PersistentRegion *region = data; | |
183 | struct XenBlkDev *blkdev = dev; | |
c1345a88 | 184 | xengnttab_handle *gnt = blkdev->xendev.gnttabdev; |
2f01dfac | 185 | |
c1345a88 | 186 | if (xengnttab_unmap(gnt, region->addr, region->num) != 0) { |
96c77dba | 187 | xen_pv_printf(&blkdev->xendev, 0, |
c1345a88 | 188 | "xengnttab_unmap region %p failed: %s\n", |
2f01dfac RPM |
189 | region->addr, strerror(errno)); |
190 | } | |
96c77dba | 191 | xen_pv_printf(&blkdev->xendev, 3, |
2f01dfac RPM |
192 | "unmapped grant region %p with %d pages\n", |
193 | region->addr, region->num); | |
194 | g_free(region); | |
195 | } | |
196 | ||
62d23efa AL |
197 | static struct ioreq *ioreq_start(struct XenBlkDev *blkdev) |
198 | { | |
199 | struct ioreq *ioreq = NULL; | |
200 | ||
72cf2d4f | 201 | if (QLIST_EMPTY(&blkdev->freelist)) { |
209cd7ab AP |
202 | if (blkdev->requests_total >= max_requests) { |
203 | goto out; | |
204 | } | |
205 | /* allocate new struct */ | |
7267c094 | 206 | ioreq = g_malloc0(sizeof(*ioreq)); |
209cd7ab AP |
207 | ioreq->blkdev = blkdev; |
208 | blkdev->requests_total++; | |
62d23efa AL |
209 | qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST); |
210 | } else { | |
209cd7ab AP |
211 | /* get one from freelist */ |
212 | ioreq = QLIST_FIRST(&blkdev->freelist); | |
213 | QLIST_REMOVE(ioreq, list); | |
62d23efa | 214 | } |
72cf2d4f | 215 | QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list); |
62d23efa AL |
216 | blkdev->requests_inflight++; |
217 | ||
218 | out: | |
219 | return ioreq; | |
220 | } | |
221 | ||
222 | static void ioreq_finish(struct ioreq *ioreq) | |
223 | { | |
224 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
225 | ||
72cf2d4f BS |
226 | QLIST_REMOVE(ioreq, list); |
227 | QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list); | |
62d23efa AL |
228 | blkdev->requests_inflight--; |
229 | blkdev->requests_finished++; | |
230 | } | |
231 | ||
ed547766 | 232 | static void ioreq_release(struct ioreq *ioreq, bool finish) |
62d23efa AL |
233 | { |
234 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
235 | ||
72cf2d4f | 236 | QLIST_REMOVE(ioreq, list); |
282c6a2f | 237 | ioreq_reset(ioreq); |
62d23efa | 238 | ioreq->blkdev = blkdev; |
72cf2d4f | 239 | QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list); |
ed547766 JB |
240 | if (finish) { |
241 | blkdev->requests_finished--; | |
242 | } else { | |
243 | blkdev->requests_inflight--; | |
244 | } | |
62d23efa AL |
245 | } |
246 | ||
247 | /* | |
248 | * translate request into iovec + start offset | |
249 | * do sanity checks along the way | |
250 | */ | |
251 | static int ioreq_parse(struct ioreq *ioreq) | |
252 | { | |
253 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
254 | uintptr_t mem; | |
255 | size_t len; | |
256 | int i; | |
257 | ||
96c77dba | 258 | xen_pv_printf(&blkdev->xendev, 3, |
209cd7ab AP |
259 | "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n", |
260 | ioreq->req.operation, ioreq->req.nr_segments, | |
261 | ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number); | |
62d23efa AL |
262 | switch (ioreq->req.operation) { |
263 | case BLKIF_OP_READ: | |
209cd7ab AP |
264 | ioreq->prot = PROT_WRITE; /* to memory */ |
265 | break; | |
7e7b7cba SS |
266 | case BLKIF_OP_FLUSH_DISKCACHE: |
267 | ioreq->presync = 1; | |
5cbdebe3 | 268 | if (!ioreq->req.nr_segments) { |
5cbdebe3 SS |
269 | return 0; |
270 | } | |
209cd7ab | 271 | /* fall through */ |
62d23efa | 272 | case BLKIF_OP_WRITE: |
209cd7ab | 273 | ioreq->prot = PROT_READ; /* from memory */ |
209cd7ab | 274 | break; |
f3135204 OH |
275 | case BLKIF_OP_DISCARD: |
276 | return 0; | |
62d23efa | 277 | default: |
96c77dba | 278 | xen_pv_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n", |
209cd7ab AP |
279 | ioreq->req.operation); |
280 | goto err; | |
62d23efa AL |
281 | }; |
282 | ||
908c7b9f | 283 | if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') { |
96c77dba | 284 | xen_pv_printf(&blkdev->xendev, 0, "error: write req for ro device\n"); |
908c7b9f GH |
285 | goto err; |
286 | } | |
287 | ||
62d23efa AL |
288 | ioreq->start = ioreq->req.sector_number * blkdev->file_blk; |
289 | for (i = 0; i < ioreq->req.nr_segments; i++) { | |
209cd7ab | 290 | if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { |
96c77dba | 291 | xen_pv_printf(&blkdev->xendev, 0, "error: nr_segments too big\n"); |
209cd7ab AP |
292 | goto err; |
293 | } | |
294 | if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) { | |
96c77dba | 295 | xen_pv_printf(&blkdev->xendev, 0, "error: first > last sector\n"); |
209cd7ab AP |
296 | goto err; |
297 | } | |
298 | if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) { | |
96c77dba | 299 | xen_pv_printf(&blkdev->xendev, 0, "error: page crossing\n"); |
209cd7ab AP |
300 | goto err; |
301 | } | |
302 | ||
303 | ioreq->domids[i] = blkdev->xendev.dom; | |
304 | ioreq->refs[i] = ioreq->req.seg[i].gref; | |
305 | ||
306 | mem = ioreq->req.seg[i].first_sect * blkdev->file_blk; | |
307 | len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk; | |
62d23efa AL |
308 | qemu_iovec_add(&ioreq->v, (void*)mem, len); |
309 | } | |
310 | if (ioreq->start + ioreq->v.size > blkdev->file_size) { | |
96c77dba | 311 | xen_pv_printf(&blkdev->xendev, 0, "error: access beyond end of file\n"); |
209cd7ab | 312 | goto err; |
62d23efa AL |
313 | } |
314 | return 0; | |
315 | ||
316 | err: | |
317 | ioreq->status = BLKIF_RSP_ERROR; | |
318 | return -1; | |
319 | } | |
320 | ||
321 | static void ioreq_unmap(struct ioreq *ioreq) | |
322 | { | |
c1345a88 | 323 | xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev; |
62d23efa AL |
324 | int i; |
325 | ||
9e496d74 | 326 | if (ioreq->num_unmap == 0 || ioreq->mapped == 0) { |
62d23efa | 327 | return; |
209cd7ab | 328 | } |
62d23efa | 329 | if (batch_maps) { |
209cd7ab AP |
330 | if (!ioreq->pages) { |
331 | return; | |
332 | } | |
c1345a88 | 333 | if (xengnttab_unmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) { |
96c77dba | 334 | xen_pv_printf(&ioreq->blkdev->xendev, 0, |
c1345a88 | 335 | "xengnttab_unmap failed: %s\n", |
209cd7ab AP |
336 | strerror(errno)); |
337 | } | |
9e496d74 | 338 | ioreq->blkdev->cnt_map -= ioreq->num_unmap; |
209cd7ab | 339 | ioreq->pages = NULL; |
62d23efa | 340 | } else { |
9e496d74 | 341 | for (i = 0; i < ioreq->num_unmap; i++) { |
209cd7ab AP |
342 | if (!ioreq->page[i]) { |
343 | continue; | |
344 | } | |
c1345a88 | 345 | if (xengnttab_unmap(gnt, ioreq->page[i], 1) != 0) { |
96c77dba | 346 | xen_pv_printf(&ioreq->blkdev->xendev, 0, |
c1345a88 | 347 | "xengnttab_unmap failed: %s\n", |
209cd7ab AP |
348 | strerror(errno)); |
349 | } | |
350 | ioreq->blkdev->cnt_map--; | |
351 | ioreq->page[i] = NULL; | |
352 | } | |
62d23efa | 353 | } |
c6961b7d | 354 | ioreq->mapped = 0; |
62d23efa AL |
355 | } |
356 | ||
357 | static int ioreq_map(struct ioreq *ioreq) | |
358 | { | |
c1345a88 | 359 | xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev; |
9e496d74 RPM |
360 | uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
361 | uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
362 | void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
363 | int i, j, new_maps = 0; | |
364 | PersistentGrant *grant; | |
2f01dfac | 365 | PersistentRegion *region; |
9e496d74 RPM |
366 | /* domids and refs variables will contain the information necessary |
367 | * to map the grants that are needed to fulfill this request. | |
368 | * | |
369 | * After mapping the needed grants, the page array will contain the | |
370 | * memory address of each granted page in the order specified in ioreq | |
371 | * (disregarding if it's a persistent grant or not). | |
372 | */ | |
62d23efa | 373 | |
c6961b7d | 374 | if (ioreq->v.niov == 0 || ioreq->mapped == 1) { |
62d23efa | 375 | return 0; |
209cd7ab | 376 | } |
9e496d74 RPM |
377 | if (ioreq->blkdev->feature_persistent) { |
378 | for (i = 0; i < ioreq->v.niov; i++) { | |
379 | grant = g_tree_lookup(ioreq->blkdev->persistent_gnts, | |
380 | GUINT_TO_POINTER(ioreq->refs[i])); | |
381 | ||
382 | if (grant != NULL) { | |
383 | page[i] = grant->page; | |
96c77dba | 384 | xen_pv_printf(&ioreq->blkdev->xendev, 3, |
9e496d74 RPM |
385 | "using persistent-grant %" PRIu32 "\n", |
386 | ioreq->refs[i]); | |
387 | } else { | |
388 | /* Add the grant to the list of grants that | |
389 | * should be mapped | |
390 | */ | |
391 | domids[new_maps] = ioreq->domids[i]; | |
392 | refs[new_maps] = ioreq->refs[i]; | |
393 | page[i] = NULL; | |
394 | new_maps++; | |
395 | } | |
396 | } | |
397 | /* Set the protection to RW, since grants may be reused later | |
398 | * with a different protection than the one needed for this request | |
399 | */ | |
400 | ioreq->prot = PROT_WRITE | PROT_READ; | |
401 | } else { | |
402 | /* All grants in the request should be mapped */ | |
403 | memcpy(refs, ioreq->refs, sizeof(refs)); | |
404 | memcpy(domids, ioreq->domids, sizeof(domids)); | |
405 | memset(page, 0, sizeof(page)); | |
406 | new_maps = ioreq->v.niov; | |
407 | } | |
408 | ||
409 | if (batch_maps && new_maps) { | |
c1345a88 | 410 | ioreq->pages = xengnttab_map_grant_refs |
9e496d74 | 411 | (gnt, new_maps, domids, refs, ioreq->prot); |
209cd7ab | 412 | if (ioreq->pages == NULL) { |
96c77dba | 413 | xen_pv_printf(&ioreq->blkdev->xendev, 0, |
209cd7ab | 414 | "can't map %d grant refs (%s, %d maps)\n", |
9e496d74 | 415 | new_maps, strerror(errno), ioreq->blkdev->cnt_map); |
209cd7ab AP |
416 | return -1; |
417 | } | |
9e496d74 RPM |
418 | for (i = 0, j = 0; i < ioreq->v.niov; i++) { |
419 | if (page[i] == NULL) { | |
420 | page[i] = ioreq->pages + (j++) * XC_PAGE_SIZE; | |
421 | } | |
209cd7ab | 422 | } |
9e496d74 RPM |
423 | ioreq->blkdev->cnt_map += new_maps; |
424 | } else if (new_maps) { | |
425 | for (i = 0; i < new_maps; i++) { | |
c1345a88 | 426 | ioreq->page[i] = xengnttab_map_grant_ref |
9e496d74 | 427 | (gnt, domids[i], refs[i], ioreq->prot); |
209cd7ab | 428 | if (ioreq->page[i] == NULL) { |
96c77dba | 429 | xen_pv_printf(&ioreq->blkdev->xendev, 0, |
209cd7ab | 430 | "can't map grant ref %d (%s, %d maps)\n", |
9e496d74 | 431 | refs[i], strerror(errno), ioreq->blkdev->cnt_map); |
a76f48e5 | 432 | ioreq->mapped = 1; |
209cd7ab AP |
433 | ioreq_unmap(ioreq); |
434 | return -1; | |
435 | } | |
209cd7ab AP |
436 | ioreq->blkdev->cnt_map++; |
437 | } | |
9e496d74 RPM |
438 | for (i = 0, j = 0; i < ioreq->v.niov; i++) { |
439 | if (page[i] == NULL) { | |
440 | page[i] = ioreq->page[j++]; | |
441 | } | |
442 | } | |
443 | } | |
2f01dfac RPM |
444 | if (ioreq->blkdev->feature_persistent && new_maps != 0 && |
445 | (!batch_maps || (ioreq->blkdev->persistent_gnt_count + new_maps <= | |
446 | ioreq->blkdev->max_grants))) { | |
447 | /* | |
448 | * If we are using persistent grants and batch mappings only | |
449 | * add the new maps to the list of persistent grants if the whole | |
450 | * area can be persistently mapped. | |
451 | */ | |
452 | if (batch_maps) { | |
453 | region = g_malloc0(sizeof(*region)); | |
454 | region->addr = ioreq->pages; | |
455 | region->num = new_maps; | |
456 | ioreq->blkdev->persistent_regions = g_slist_append( | |
457 | ioreq->blkdev->persistent_regions, | |
458 | region); | |
459 | } | |
9e496d74 RPM |
460 | while ((ioreq->blkdev->persistent_gnt_count < ioreq->blkdev->max_grants) |
461 | && new_maps) { | |
462 | /* Go through the list of newly mapped grants and add as many | |
463 | * as possible to the list of persistently mapped grants. | |
464 | * | |
465 | * Since we start at the end of ioreq->page(s), we only need | |
466 | * to decrease new_maps to prevent this granted pages from | |
467 | * being unmapped in ioreq_unmap. | |
468 | */ | |
469 | grant = g_malloc0(sizeof(*grant)); | |
470 | new_maps--; | |
471 | if (batch_maps) { | |
472 | grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE; | |
473 | } else { | |
474 | grant->page = ioreq->page[new_maps]; | |
475 | } | |
476 | grant->blkdev = ioreq->blkdev; | |
96c77dba | 477 | xen_pv_printf(&ioreq->blkdev->xendev, 3, |
9e496d74 RPM |
478 | "adding grant %" PRIu32 " page: %p\n", |
479 | refs[new_maps], grant->page); | |
480 | g_tree_insert(ioreq->blkdev->persistent_gnts, | |
481 | GUINT_TO_POINTER(refs[new_maps]), | |
482 | grant); | |
483 | ioreq->blkdev->persistent_gnt_count++; | |
484 | } | |
2f01dfac | 485 | assert(!batch_maps || new_maps == 0); |
9e496d74 RPM |
486 | } |
487 | for (i = 0; i < ioreq->v.niov; i++) { | |
488 | ioreq->v.iov[i].iov_base += (uintptr_t)page[i]; | |
62d23efa | 489 | } |
c6961b7d | 490 | ioreq->mapped = 1; |
9e496d74 | 491 | ioreq->num_unmap = new_maps; |
62d23efa AL |
492 | return 0; |
493 | } | |
494 | ||
b6eb9b45 PS |
495 | #if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 480 |
496 | ||
497 | static void ioreq_free_copy_buffers(struct ioreq *ioreq) | |
498 | { | |
499 | int i; | |
500 | ||
501 | for (i = 0; i < ioreq->v.niov; i++) { | |
502 | ioreq->page[i] = NULL; | |
503 | } | |
504 | ||
505 | qemu_vfree(ioreq->pages); | |
506 | } | |
507 | ||
508 | static int ioreq_init_copy_buffers(struct ioreq *ioreq) | |
509 | { | |
510 | int i; | |
511 | ||
512 | if (ioreq->v.niov == 0) { | |
513 | return 0; | |
514 | } | |
515 | ||
516 | ioreq->pages = qemu_memalign(XC_PAGE_SIZE, ioreq->v.niov * XC_PAGE_SIZE); | |
517 | ||
518 | for (i = 0; i < ioreq->v.niov; i++) { | |
519 | ioreq->page[i] = ioreq->pages + i * XC_PAGE_SIZE; | |
520 | ioreq->v.iov[i].iov_base = ioreq->page[i]; | |
521 | } | |
522 | ||
523 | return 0; | |
524 | } | |
525 | ||
526 | static int ioreq_grant_copy(struct ioreq *ioreq) | |
527 | { | |
528 | xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev; | |
529 | xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
530 | int i, count, rc; | |
531 | int64_t file_blk = ioreq->blkdev->file_blk; | |
532 | ||
533 | if (ioreq->v.niov == 0) { | |
534 | return 0; | |
535 | } | |
536 | ||
537 | count = ioreq->v.niov; | |
538 | ||
539 | for (i = 0; i < count; i++) { | |
540 | if (ioreq->req.operation == BLKIF_OP_READ) { | |
541 | segs[i].flags = GNTCOPY_dest_gref; | |
542 | segs[i].dest.foreign.ref = ioreq->refs[i]; | |
543 | segs[i].dest.foreign.domid = ioreq->domids[i]; | |
544 | segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk; | |
545 | segs[i].source.virt = ioreq->v.iov[i].iov_base; | |
546 | } else { | |
547 | segs[i].flags = GNTCOPY_source_gref; | |
548 | segs[i].source.foreign.ref = ioreq->refs[i]; | |
549 | segs[i].source.foreign.domid = ioreq->domids[i]; | |
550 | segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk; | |
551 | segs[i].dest.virt = ioreq->v.iov[i].iov_base; | |
552 | } | |
553 | segs[i].len = (ioreq->req.seg[i].last_sect | |
554 | - ioreq->req.seg[i].first_sect + 1) * file_blk; | |
555 | } | |
556 | ||
557 | rc = xengnttab_grant_copy(gnt, count, segs); | |
558 | ||
559 | if (rc) { | |
96c77dba | 560 | xen_pv_printf(&ioreq->blkdev->xendev, 0, |
b6eb9b45 PS |
561 | "failed to copy data %d\n", rc); |
562 | ioreq->aio_errors++; | |
563 | return -1; | |
564 | } | |
565 | ||
566 | for (i = 0; i < count; i++) { | |
567 | if (segs[i].status != GNTST_okay) { | |
96c77dba | 568 | xen_pv_printf(&ioreq->blkdev->xendev, 3, |
b6eb9b45 PS |
569 | "failed to copy data %d for gref %d, domid %d\n", |
570 | segs[i].status, ioreq->refs[i], ioreq->domids[i]); | |
571 | ioreq->aio_errors++; | |
572 | rc = -1; | |
573 | } | |
574 | } | |
575 | ||
576 | return rc; | |
577 | } | |
578 | #else | |
579 | static void ioreq_free_copy_buffers(struct ioreq *ioreq) | |
580 | { | |
581 | abort(); | |
582 | } | |
583 | ||
584 | static int ioreq_init_copy_buffers(struct ioreq *ioreq) | |
585 | { | |
586 | abort(); | |
587 | } | |
588 | ||
589 | static int ioreq_grant_copy(struct ioreq *ioreq) | |
590 | { | |
591 | abort(); | |
592 | } | |
593 | #endif | |
594 | ||
c6961b7d SS |
595 | static int ioreq_runio_qemu_aio(struct ioreq *ioreq); |
596 | ||
62d23efa AL |
597 | static void qemu_aio_complete(void *opaque, int ret) |
598 | { | |
599 | struct ioreq *ioreq = opaque; | |
600 | ||
601 | if (ret != 0) { | |
96c77dba | 602 | xen_pv_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n", |
62d23efa AL |
603 | ioreq->req.operation == BLKIF_OP_READ ? "read" : "write"); |
604 | ioreq->aio_errors++; | |
605 | } | |
606 | ||
607 | ioreq->aio_inflight--; | |
c6961b7d SS |
608 | if (ioreq->presync) { |
609 | ioreq->presync = 0; | |
610 | ioreq_runio_qemu_aio(ioreq); | |
611 | return; | |
612 | } | |
209cd7ab | 613 | if (ioreq->aio_inflight > 0) { |
62d23efa | 614 | return; |
209cd7ab | 615 | } |
62d23efa | 616 | |
b6eb9b45 PS |
617 | if (ioreq->blkdev->feature_grant_copy) { |
618 | switch (ioreq->req.operation) { | |
619 | case BLKIF_OP_READ: | |
620 | /* in case of failure ioreq->aio_errors is increased */ | |
621 | if (ret == 0) { | |
622 | ioreq_grant_copy(ioreq); | |
623 | } | |
624 | ioreq_free_copy_buffers(ioreq); | |
625 | break; | |
626 | case BLKIF_OP_WRITE: | |
627 | case BLKIF_OP_FLUSH_DISKCACHE: | |
628 | if (!ioreq->req.nr_segments) { | |
629 | break; | |
630 | } | |
631 | ioreq_free_copy_buffers(ioreq); | |
632 | break; | |
633 | default: | |
634 | break; | |
635 | } | |
636 | } | |
637 | ||
62d23efa | 638 | ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; |
b6eb9b45 PS |
639 | if (!ioreq->blkdev->feature_grant_copy) { |
640 | ioreq_unmap(ioreq); | |
641 | } | |
62d23efa | 642 | ioreq_finish(ioreq); |
58da5b1e OH |
643 | switch (ioreq->req.operation) { |
644 | case BLKIF_OP_WRITE: | |
645 | case BLKIF_OP_FLUSH_DISKCACHE: | |
646 | if (!ioreq->req.nr_segments) { | |
647 | break; | |
648 | } | |
649 | case BLKIF_OP_READ: | |
57ee366c AG |
650 | if (ioreq->status == BLKIF_RSP_OKAY) { |
651 | block_acct_done(blk_get_stats(ioreq->blkdev->blk), &ioreq->acct); | |
652 | } else { | |
653 | block_acct_failed(blk_get_stats(ioreq->blkdev->blk), &ioreq->acct); | |
654 | } | |
58da5b1e | 655 | break; |
f3135204 | 656 | case BLKIF_OP_DISCARD: |
58da5b1e OH |
657 | default: |
658 | break; | |
659 | } | |
62d23efa AL |
660 | qemu_bh_schedule(ioreq->blkdev->bh); |
661 | } | |
662 | ||
7875efb9 OH |
663 | static bool blk_split_discard(struct ioreq *ioreq, blkif_sector_t sector_number, |
664 | uint64_t nr_sectors) | |
665 | { | |
666 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
667 | int64_t byte_offset; | |
668 | int byte_chunk; | |
669 | uint64_t byte_remaining, limit; | |
670 | uint64_t sec_start = sector_number; | |
671 | uint64_t sec_count = nr_sectors; | |
672 | ||
673 | /* Wrap around, or overflowing byte limit? */ | |
674 | if (sec_start + sec_count < sec_count || | |
675 | sec_start + sec_count > INT64_MAX >> BDRV_SECTOR_BITS) { | |
676 | return false; | |
677 | } | |
678 | ||
679 | limit = BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS; | |
680 | byte_offset = sec_start << BDRV_SECTOR_BITS; | |
681 | byte_remaining = sec_count << BDRV_SECTOR_BITS; | |
682 | ||
683 | do { | |
684 | byte_chunk = byte_remaining > limit ? limit : byte_remaining; | |
685 | ioreq->aio_inflight++; | |
686 | blk_aio_pdiscard(blkdev->blk, byte_offset, byte_chunk, | |
687 | qemu_aio_complete, ioreq); | |
688 | byte_remaining -= byte_chunk; | |
689 | byte_offset += byte_chunk; | |
690 | } while (byte_remaining > 0); | |
691 | ||
692 | return true; | |
693 | } | |
694 | ||
62d23efa AL |
695 | static int ioreq_runio_qemu_aio(struct ioreq *ioreq) |
696 | { | |
697 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
698 | ||
b6eb9b45 PS |
699 | if (ioreq->blkdev->feature_grant_copy) { |
700 | ioreq_init_copy_buffers(ioreq); | |
701 | if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE || | |
702 | ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE) && | |
703 | ioreq_grant_copy(ioreq)) { | |
704 | ioreq_free_copy_buffers(ioreq); | |
705 | goto err; | |
706 | } | |
707 | } else { | |
708 | if (ioreq->req.nr_segments && ioreq_map(ioreq)) { | |
709 | goto err; | |
710 | } | |
209cd7ab | 711 | } |
62d23efa AL |
712 | |
713 | ioreq->aio_inflight++; | |
209cd7ab | 714 | if (ioreq->presync) { |
4be74634 | 715 | blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq); |
c6961b7d | 716 | return 0; |
209cd7ab | 717 | } |
62d23efa AL |
718 | |
719 | switch (ioreq->req.operation) { | |
720 | case BLKIF_OP_READ: | |
4be74634 | 721 | block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct, |
5366d0c8 | 722 | ioreq->v.size, BLOCK_ACCT_READ); |
62d23efa | 723 | ioreq->aio_inflight++; |
d00000f9 EB |
724 | blk_aio_preadv(blkdev->blk, ioreq->start, &ioreq->v, 0, |
725 | qemu_aio_complete, ioreq); | |
209cd7ab | 726 | break; |
62d23efa | 727 | case BLKIF_OP_WRITE: |
7e7b7cba | 728 | case BLKIF_OP_FLUSH_DISKCACHE: |
209cd7ab | 729 | if (!ioreq->req.nr_segments) { |
5cbdebe3 | 730 | break; |
209cd7ab | 731 | } |
a597e79c | 732 | |
4be74634 | 733 | block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct, |
693044eb AG |
734 | ioreq->v.size, |
735 | ioreq->req.operation == BLKIF_OP_WRITE ? | |
736 | BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH); | |
209bef3e | 737 | ioreq->aio_inflight++; |
d00000f9 EB |
738 | blk_aio_pwritev(blkdev->blk, ioreq->start, &ioreq->v, 0, |
739 | qemu_aio_complete, ioreq); | |
209cd7ab | 740 | break; |
f3135204 OH |
741 | case BLKIF_OP_DISCARD: |
742 | { | |
7875efb9 OH |
743 | struct blkif_request_discard *req = (void *)&ioreq->req; |
744 | if (!blk_split_discard(ioreq, req->sector_number, req->nr_sectors)) { | |
745 | goto err; | |
746 | } | |
f3135204 OH |
747 | break; |
748 | } | |
62d23efa | 749 | default: |
209cd7ab | 750 | /* unknown operation (shouldn't happen -- parse catches this) */ |
b6eb9b45 PS |
751 | if (!ioreq->blkdev->feature_grant_copy) { |
752 | ioreq_unmap(ioreq); | |
753 | } | |
209cd7ab | 754 | goto err; |
62d23efa AL |
755 | } |
756 | ||
62d23efa AL |
757 | qemu_aio_complete(ioreq, 0); |
758 | ||
759 | return 0; | |
760 | ||
761 | err: | |
f6ec953c | 762 | ioreq_finish(ioreq); |
62d23efa AL |
763 | ioreq->status = BLKIF_RSP_ERROR; |
764 | return -1; | |
765 | } | |
766 | ||
767 | static int blk_send_response_one(struct ioreq *ioreq) | |
768 | { | |
769 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
770 | int send_notify = 0; | |
771 | int have_requests = 0; | |
772 | blkif_response_t resp; | |
773 | void *dst; | |
774 | ||
775 | resp.id = ioreq->req.id; | |
776 | resp.operation = ioreq->req.operation; | |
777 | resp.status = ioreq->status; | |
778 | ||
779 | /* Place on the response ring for the relevant domain. */ | |
780 | switch (blkdev->protocol) { | |
781 | case BLKIF_PROTOCOL_NATIVE: | |
209cd7ab AP |
782 | dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt); |
783 | break; | |
62d23efa | 784 | case BLKIF_PROTOCOL_X86_32: |
6fcfeff9 BS |
785 | dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part, |
786 | blkdev->rings.x86_32_part.rsp_prod_pvt); | |
209cd7ab | 787 | break; |
62d23efa | 788 | case BLKIF_PROTOCOL_X86_64: |
6fcfeff9 BS |
789 | dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part, |
790 | blkdev->rings.x86_64_part.rsp_prod_pvt); | |
209cd7ab | 791 | break; |
62d23efa | 792 | default: |
209cd7ab | 793 | dst = NULL; |
8cced121 | 794 | return 0; |
62d23efa AL |
795 | } |
796 | memcpy(dst, &resp, sizeof(resp)); | |
797 | blkdev->rings.common.rsp_prod_pvt++; | |
798 | ||
799 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify); | |
800 | if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) { | |
209cd7ab AP |
801 | /* |
802 | * Tail check for pending requests. Allows frontend to avoid | |
803 | * notifications if requests are already in flight (lower | |
804 | * overheads and promotes batching). | |
805 | */ | |
806 | RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests); | |
62d23efa | 807 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) { |
209cd7ab | 808 | have_requests = 1; |
62d23efa AL |
809 | } |
810 | ||
209cd7ab AP |
811 | if (have_requests) { |
812 | blkdev->more_work++; | |
813 | } | |
62d23efa AL |
814 | return send_notify; |
815 | } | |
816 | ||
817 | /* walk finished list, send outstanding responses, free requests */ | |
818 | static void blk_send_response_all(struct XenBlkDev *blkdev) | |
819 | { | |
820 | struct ioreq *ioreq; | |
821 | int send_notify = 0; | |
822 | ||
72cf2d4f BS |
823 | while (!QLIST_EMPTY(&blkdev->finished)) { |
824 | ioreq = QLIST_FIRST(&blkdev->finished); | |
209cd7ab | 825 | send_notify += blk_send_response_one(ioreq); |
ed547766 | 826 | ioreq_release(ioreq, true); |
209cd7ab AP |
827 | } |
828 | if (send_notify) { | |
ba18fa2a | 829 | xen_pv_send_notify(&blkdev->xendev); |
62d23efa | 830 | } |
62d23efa AL |
831 | } |
832 | ||
833 | static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc) | |
834 | { | |
835 | switch (blkdev->protocol) { | |
836 | case BLKIF_PROTOCOL_NATIVE: | |
209cd7ab AP |
837 | memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc), |
838 | sizeof(ioreq->req)); | |
839 | break; | |
62d23efa | 840 | case BLKIF_PROTOCOL_X86_32: |
6fcfeff9 BS |
841 | blkif_get_x86_32_req(&ioreq->req, |
842 | RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc)); | |
209cd7ab | 843 | break; |
62d23efa | 844 | case BLKIF_PROTOCOL_X86_64: |
6fcfeff9 BS |
845 | blkif_get_x86_64_req(&ioreq->req, |
846 | RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc)); | |
209cd7ab | 847 | break; |
62d23efa | 848 | } |
4837a1a5 JB |
849 | /* Prevent the compiler from accessing the on-ring fields instead. */ |
850 | barrier(); | |
62d23efa AL |
851 | return 0; |
852 | } | |
853 | ||
854 | static void blk_handle_requests(struct XenBlkDev *blkdev) | |
855 | { | |
856 | RING_IDX rc, rp; | |
857 | struct ioreq *ioreq; | |
858 | ||
859 | blkdev->more_work = 0; | |
860 | ||
861 | rc = blkdev->rings.common.req_cons; | |
862 | rp = blkdev->rings.common.sring->req_prod; | |
863 | xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ | |
864 | ||
4e5b184d | 865 | blk_send_response_all(blkdev); |
fc1f79f7 | 866 | while (rc != rp) { |
62d23efa | 867 | /* pull request from ring */ |
209cd7ab | 868 | if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) { |
62d23efa | 869 | break; |
209cd7ab | 870 | } |
62d23efa AL |
871 | ioreq = ioreq_start(blkdev); |
872 | if (ioreq == NULL) { | |
873 | blkdev->more_work++; | |
874 | break; | |
875 | } | |
876 | blk_get_request(blkdev, ioreq, rc); | |
877 | blkdev->rings.common.req_cons = ++rc; | |
878 | ||
879 | /* parse them */ | |
880 | if (ioreq_parse(ioreq) != 0) { | |
57ee366c AG |
881 | |
882 | switch (ioreq->req.operation) { | |
883 | case BLKIF_OP_READ: | |
884 | block_acct_invalid(blk_get_stats(blkdev->blk), | |
885 | BLOCK_ACCT_READ); | |
886 | break; | |
887 | case BLKIF_OP_WRITE: | |
888 | block_acct_invalid(blk_get_stats(blkdev->blk), | |
889 | BLOCK_ACCT_WRITE); | |
890 | break; | |
891 | case BLKIF_OP_FLUSH_DISKCACHE: | |
892 | block_acct_invalid(blk_get_stats(blkdev->blk), | |
893 | BLOCK_ACCT_FLUSH); | |
894 | default: | |
895 | break; | |
896 | }; | |
897 | ||
209cd7ab | 898 | if (blk_send_response_one(ioreq)) { |
ba18fa2a | 899 | xen_pv_send_notify(&blkdev->xendev); |
209cd7ab | 900 | } |
ed547766 | 901 | ioreq_release(ioreq, false); |
62d23efa AL |
902 | continue; |
903 | } | |
904 | ||
4e5b184d | 905 | ioreq_runio_qemu_aio(ioreq); |
209cd7ab | 906 | } |
62d23efa | 907 | |
209cd7ab | 908 | if (blkdev->more_work && blkdev->requests_inflight < max_requests) { |
62d23efa | 909 | qemu_bh_schedule(blkdev->bh); |
209cd7ab | 910 | } |
62d23efa AL |
911 | } |
912 | ||
913 | /* ------------------------------------------------------------- */ | |
914 | ||
915 | static void blk_bh(void *opaque) | |
916 | { | |
917 | struct XenBlkDev *blkdev = opaque; | |
918 | blk_handle_requests(blkdev); | |
919 | } | |
920 | ||
64c27e5b JB |
921 | /* |
922 | * We need to account for the grant allocations requiring contiguous | |
923 | * chunks; the worst case number would be | |
924 | * max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1, | |
925 | * but in order to keep things simple just use | |
926 | * 2 * max_req * max_seg. | |
927 | */ | |
928 | #define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg)) | |
929 | ||
62d23efa AL |
930 | static void blk_alloc(struct XenDevice *xendev) |
931 | { | |
932 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
933 | ||
72cf2d4f BS |
934 | QLIST_INIT(&blkdev->inflight); |
935 | QLIST_INIT(&blkdev->finished); | |
936 | QLIST_INIT(&blkdev->freelist); | |
62d23efa | 937 | blkdev->bh = qemu_bh_new(blk_bh, blkdev); |
209cd7ab | 938 | if (xen_mode != XEN_EMULATE) { |
62d23efa | 939 | batch_maps = 1; |
209cd7ab | 940 | } |
c1345a88 | 941 | if (xengnttab_set_max_grants(xendev->gnttabdev, |
64c27e5b | 942 | MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) { |
96c77dba | 943 | xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n", |
64c27e5b JB |
944 | strerror(errno)); |
945 | } | |
62d23efa AL |
946 | } |
947 | ||
f3135204 OH |
948 | static void blk_parse_discard(struct XenBlkDev *blkdev) |
949 | { | |
950 | int enable; | |
951 | ||
952 | blkdev->feature_discard = true; | |
953 | ||
954 | if (xenstore_read_be_int(&blkdev->xendev, "discard-enable", &enable) == 0) { | |
955 | blkdev->feature_discard = !!enable; | |
956 | } | |
957 | ||
958 | if (blkdev->feature_discard) { | |
959 | xenstore_write_be_int(&blkdev->xendev, "feature-discard", 1); | |
960 | } | |
961 | } | |
962 | ||
62d23efa AL |
963 | static int blk_init(struct XenDevice *xendev) |
964 | { | |
965 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
86f425db | 966 | int info = 0; |
454ae734 | 967 | char *directiosafe = NULL; |
62d23efa AL |
968 | |
969 | /* read xenstore entries */ | |
970 | if (blkdev->params == NULL) { | |
5ea3c2b4 | 971 | char *h = NULL; |
209cd7ab | 972 | blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params"); |
5ea3c2b4 SS |
973 | if (blkdev->params != NULL) { |
974 | h = strchr(blkdev->params, ':'); | |
975 | } | |
209cd7ab AP |
976 | if (h != NULL) { |
977 | blkdev->fileproto = blkdev->params; | |
978 | blkdev->filename = h+1; | |
979 | *h = 0; | |
980 | } else { | |
981 | blkdev->fileproto = "<unset>"; | |
982 | blkdev->filename = blkdev->params; | |
983 | } | |
984 | } | |
7cef3f4f SS |
985 | if (!strcmp("aio", blkdev->fileproto)) { |
986 | blkdev->fileproto = "raw"; | |
987 | } | |
fc3e493b SS |
988 | if (!strcmp("vhd", blkdev->fileproto)) { |
989 | blkdev->fileproto = "vpc"; | |
990 | } | |
209cd7ab AP |
991 | if (blkdev->mode == NULL) { |
992 | blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode"); | |
993 | } | |
994 | if (blkdev->type == NULL) { | |
995 | blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type"); | |
996 | } | |
997 | if (blkdev->dev == NULL) { | |
998 | blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev"); | |
999 | } | |
1000 | if (blkdev->devtype == NULL) { | |
1001 | blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type"); | |
1002 | } | |
454ae734 SS |
1003 | directiosafe = xenstore_read_be_str(&blkdev->xendev, "direct-io-safe"); |
1004 | blkdev->directiosafe = (directiosafe && atoi(directiosafe)); | |
62d23efa AL |
1005 | |
1006 | /* do we have all we need? */ | |
1007 | if (blkdev->params == NULL || | |
209cd7ab AP |
1008 | blkdev->mode == NULL || |
1009 | blkdev->type == NULL || | |
1010 | blkdev->dev == NULL) { | |
5ea3c2b4 | 1011 | goto out_error; |
209cd7ab | 1012 | } |
62d23efa AL |
1013 | |
1014 | /* read-only ? */ | |
86f425db | 1015 | if (strcmp(blkdev->mode, "w")) { |
209cd7ab | 1016 | info |= VDISK_READONLY; |
62d23efa AL |
1017 | } |
1018 | ||
1019 | /* cdrom ? */ | |
209cd7ab AP |
1020 | if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) { |
1021 | info |= VDISK_CDROM; | |
1022 | } | |
62d23efa | 1023 | |
86f425db AB |
1024 | blkdev->file_blk = BLOCK_SIZE; |
1025 | ||
1026 | /* fill info | |
1027 | * blk_connect supplies sector-size and sectors | |
1028 | */ | |
1029 | xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1); | |
1030 | xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1); | |
1031 | xenstore_write_be_int(&blkdev->xendev, "info", info); | |
454ae734 | 1032 | |
f3135204 OH |
1033 | blk_parse_discard(blkdev); |
1034 | ||
454ae734 | 1035 | g_free(directiosafe); |
86f425db AB |
1036 | return 0; |
1037 | ||
1038 | out_error: | |
1039 | g_free(blkdev->params); | |
1040 | blkdev->params = NULL; | |
1041 | g_free(blkdev->mode); | |
1042 | blkdev->mode = NULL; | |
1043 | g_free(blkdev->type); | |
1044 | blkdev->type = NULL; | |
1045 | g_free(blkdev->dev); | |
1046 | blkdev->dev = NULL; | |
1047 | g_free(blkdev->devtype); | |
1048 | blkdev->devtype = NULL; | |
454ae734 SS |
1049 | g_free(directiosafe); |
1050 | blkdev->directiosafe = false; | |
86f425db AB |
1051 | return -1; |
1052 | } | |
1053 | ||
1054 | static int blk_connect(struct XenDevice *xendev) | |
1055 | { | |
1056 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
1057 | int pers, index, qflags; | |
b64ec4e4 | 1058 | bool readonly = true; |
ecdd3cc8 | 1059 | bool writethrough = true; |
86f425db AB |
1060 | |
1061 | /* read-only ? */ | |
454ae734 SS |
1062 | if (blkdev->directiosafe) { |
1063 | qflags = BDRV_O_NOCACHE | BDRV_O_NATIVE_AIO; | |
1064 | } else { | |
ecdd3cc8 KW |
1065 | qflags = 0; |
1066 | writethrough = false; | |
454ae734 | 1067 | } |
86f425db AB |
1068 | if (strcmp(blkdev->mode, "w") == 0) { |
1069 | qflags |= BDRV_O_RDWR; | |
b64ec4e4 | 1070 | readonly = false; |
86f425db | 1071 | } |
f3135204 OH |
1072 | if (blkdev->feature_discard) { |
1073 | qflags |= BDRV_O_UNMAP; | |
1074 | } | |
86f425db | 1075 | |
62d23efa | 1076 | /* init qemu block driver */ |
751c6a17 GH |
1077 | index = (blkdev->xendev.dev - 202 * 256) / 16; |
1078 | blkdev->dinfo = drive_get(IF_XEN, 0, index); | |
1079 | if (!blkdev->dinfo) { | |
98522f63 | 1080 | Error *local_err = NULL; |
9a925356 | 1081 | QDict *options = NULL; |
cedccf13 | 1082 | |
9a925356 HR |
1083 | if (strcmp(blkdev->fileproto, "<unset>")) { |
1084 | options = qdict_new(); | |
1085 | qdict_put(options, "driver", qstring_from_str(blkdev->fileproto)); | |
26f54e9a | 1086 | } |
cedccf13 | 1087 | |
9a925356 | 1088 | /* setup via xenbus -> create new block driver instance */ |
96c77dba | 1089 | xen_pv_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n"); |
efaa7c4e | 1090 | blkdev->blk = blk_new_open(blkdev->filename, NULL, options, |
9a925356 HR |
1091 | qflags, &local_err); |
1092 | if (!blkdev->blk) { | |
96c77dba | 1093 | xen_pv_printf(&blkdev->xendev, 0, "error: %s\n", |
cedccf13 MA |
1094 | error_get_pretty(local_err)); |
1095 | error_free(local_err); | |
cedccf13 MA |
1096 | return -1; |
1097 | } | |
ecdd3cc8 | 1098 | blk_set_enable_write_cache(blkdev->blk, !writethrough); |
62d23efa AL |
1099 | } else { |
1100 | /* setup via qemu cmdline -> already setup for us */ | |
96c77dba | 1101 | xen_pv_printf(&blkdev->xendev, 2, |
b9730c5b | 1102 | "get configured bdrv (cmdline setup)\n"); |
4be74634 MA |
1103 | blkdev->blk = blk_by_legacy_dinfo(blkdev->dinfo); |
1104 | if (blk_is_read_only(blkdev->blk) && !readonly) { | |
96c77dba | 1105 | xen_pv_printf(&blkdev->xendev, 0, "Unexpected read-only drive"); |
4be74634 | 1106 | blkdev->blk = NULL; |
4f8a066b KW |
1107 | return -1; |
1108 | } | |
4be74634 MA |
1109 | /* blkdev->blk is not create by us, we get a reference |
1110 | * so we can blk_unref() unconditionally */ | |
1111 | blk_ref(blkdev->blk); | |
1112 | } | |
bbc8ea98 | 1113 | blk_attach_dev_legacy(blkdev->blk, blkdev); |
4be74634 | 1114 | blkdev->file_size = blk_getlength(blkdev->blk); |
62d23efa | 1115 | if (blkdev->file_size < 0) { |
5433c24f HR |
1116 | BlockDriverState *bs = blk_bs(blkdev->blk); |
1117 | const char *drv_name = bs ? bdrv_get_format_name(bs) : NULL; | |
96c77dba | 1118 | xen_pv_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n", |
62d23efa | 1119 | (int)blkdev->file_size, strerror(-blkdev->file_size), |
5433c24f | 1120 | drv_name ?: "-"); |
209cd7ab | 1121 | blkdev->file_size = 0; |
62d23efa | 1122 | } |
62d23efa | 1123 | |
96c77dba | 1124 | xen_pv_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\"," |
209cd7ab AP |
1125 | " size %" PRId64 " (%" PRId64 " MB)\n", |
1126 | blkdev->type, blkdev->fileproto, blkdev->filename, | |
1127 | blkdev->file_size, blkdev->file_size >> 20); | |
62d23efa | 1128 | |
86f425db AB |
1129 | /* Fill in number of sector size and number of sectors */ |
1130 | xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); | |
9246ce88 FF |
1131 | xenstore_write_be_int64(&blkdev->xendev, "sectors", |
1132 | blkdev->file_size / blkdev->file_blk); | |
62d23efa | 1133 | |
209cd7ab AP |
1134 | if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { |
1135 | return -1; | |
1136 | } | |
62d23efa | 1137 | if (xenstore_read_fe_int(&blkdev->xendev, "event-channel", |
209cd7ab AP |
1138 | &blkdev->xendev.remote_port) == -1) { |
1139 | return -1; | |
1140 | } | |
9e496d74 RPM |
1141 | if (xenstore_read_fe_int(&blkdev->xendev, "feature-persistent", &pers)) { |
1142 | blkdev->feature_persistent = FALSE; | |
1143 | } else { | |
1144 | blkdev->feature_persistent = !!pers; | |
1145 | } | |
62d23efa | 1146 | |
4ada797b JG |
1147 | if (!blkdev->xendev.protocol) { |
1148 | blkdev->protocol = BLKIF_PROTOCOL_NATIVE; | |
1149 | } else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_NATIVE) == 0) { | |
1150 | blkdev->protocol = BLKIF_PROTOCOL_NATIVE; | |
1151 | } else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { | |
1152 | blkdev->protocol = BLKIF_PROTOCOL_X86_32; | |
1153 | } else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) { | |
1154 | blkdev->protocol = BLKIF_PROTOCOL_X86_64; | |
1155 | } else { | |
1156 | blkdev->protocol = BLKIF_PROTOCOL_NATIVE; | |
62d23efa AL |
1157 | } |
1158 | ||
c1345a88 | 1159 | blkdev->sring = xengnttab_map_grant_ref(blkdev->xendev.gnttabdev, |
209cd7ab AP |
1160 | blkdev->xendev.dom, |
1161 | blkdev->ring_ref, | |
1162 | PROT_READ | PROT_WRITE); | |
1163 | if (!blkdev->sring) { | |
1164 | return -1; | |
1165 | } | |
62d23efa AL |
1166 | blkdev->cnt_map++; |
1167 | ||
1168 | switch (blkdev->protocol) { | |
1169 | case BLKIF_PROTOCOL_NATIVE: | |
1170 | { | |
209cd7ab AP |
1171 | blkif_sring_t *sring_native = blkdev->sring; |
1172 | BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE); | |
1173 | break; | |
62d23efa AL |
1174 | } |
1175 | case BLKIF_PROTOCOL_X86_32: | |
1176 | { | |
209cd7ab | 1177 | blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring; |
6fcfeff9 BS |
1178 | |
1179 | BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE); | |
209cd7ab | 1180 | break; |
62d23efa AL |
1181 | } |
1182 | case BLKIF_PROTOCOL_X86_64: | |
1183 | { | |
209cd7ab | 1184 | blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring; |
6fcfeff9 BS |
1185 | |
1186 | BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE); | |
209cd7ab | 1187 | break; |
62d23efa AL |
1188 | } |
1189 | } | |
1190 | ||
9e496d74 RPM |
1191 | if (blkdev->feature_persistent) { |
1192 | /* Init persistent grants */ | |
1193 | blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST; | |
1194 | blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp, | |
1195 | NULL, NULL, | |
2f01dfac RPM |
1196 | batch_maps ? |
1197 | (GDestroyNotify)g_free : | |
9e496d74 | 1198 | (GDestroyNotify)destroy_grant); |
2f01dfac | 1199 | blkdev->persistent_regions = NULL; |
9e496d74 RPM |
1200 | blkdev->persistent_gnt_count = 0; |
1201 | } | |
1202 | ||
62d23efa AL |
1203 | xen_be_bind_evtchn(&blkdev->xendev); |
1204 | ||
b6eb9b45 PS |
1205 | blkdev->feature_grant_copy = |
1206 | (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0); | |
1207 | ||
96c77dba | 1208 | xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n", |
b6eb9b45 PS |
1209 | blkdev->feature_grant_copy ? "enabled" : "disabled"); |
1210 | ||
96c77dba | 1211 | xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, " |
209cd7ab AP |
1212 | "remote port %d, local port %d\n", |
1213 | blkdev->xendev.protocol, blkdev->ring_ref, | |
1214 | blkdev->xendev.remote_port, blkdev->xendev.local_port); | |
62d23efa AL |
1215 | return 0; |
1216 | } | |
1217 | ||
1218 | static void blk_disconnect(struct XenDevice *xendev) | |
1219 | { | |
1220 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
1221 | ||
4be74634 MA |
1222 | if (blkdev->blk) { |
1223 | blk_detach_dev(blkdev->blk, blkdev); | |
1224 | blk_unref(blkdev->blk); | |
1225 | blkdev->blk = NULL; | |
62d23efa | 1226 | } |
65807f4b | 1227 | xen_pv_unbind_evtchn(&blkdev->xendev); |
62d23efa AL |
1228 | |
1229 | if (blkdev->sring) { | |
c1345a88 | 1230 | xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring, 1); |
209cd7ab AP |
1231 | blkdev->cnt_map--; |
1232 | blkdev->sring = NULL; | |
62d23efa | 1233 | } |
2f01dfac RPM |
1234 | |
1235 | /* | |
1236 | * Unmap persistent grants before switching to the closed state | |
1237 | * so the frontend can free them. | |
1238 | * | |
1239 | * In the !batch_maps case g_tree_destroy will take care of unmapping | |
1240 | * the grant, but in the batch_maps case we need to iterate over every | |
1241 | * region in persistent_regions and unmap it. | |
1242 | */ | |
1243 | if (blkdev->feature_persistent) { | |
1244 | g_tree_destroy(blkdev->persistent_gnts); | |
1245 | assert(batch_maps || blkdev->persistent_gnt_count == 0); | |
1246 | if (batch_maps) { | |
1247 | blkdev->persistent_gnt_count = 0; | |
1248 | g_slist_foreach(blkdev->persistent_regions, | |
1249 | (GFunc)remove_persistent_region, blkdev); | |
1250 | g_slist_free(blkdev->persistent_regions); | |
1251 | } | |
1252 | blkdev->feature_persistent = false; | |
1253 | } | |
62d23efa AL |
1254 | } |
1255 | ||
1256 | static int blk_free(struct XenDevice *xendev) | |
1257 | { | |
1258 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
1259 | struct ioreq *ioreq; | |
1260 | ||
4be74634 | 1261 | if (blkdev->blk || blkdev->sring) { |
77ba8fef SS |
1262 | blk_disconnect(xendev); |
1263 | } | |
1264 | ||
72cf2d4f | 1265 | while (!QLIST_EMPTY(&blkdev->freelist)) { |
209cd7ab | 1266 | ioreq = QLIST_FIRST(&blkdev->freelist); |
72cf2d4f | 1267 | QLIST_REMOVE(ioreq, list); |
62d23efa | 1268 | qemu_iovec_destroy(&ioreq->v); |
7267c094 | 1269 | g_free(ioreq); |
62d23efa AL |
1270 | } |
1271 | ||
7267c094 AL |
1272 | g_free(blkdev->params); |
1273 | g_free(blkdev->mode); | |
1274 | g_free(blkdev->type); | |
1275 | g_free(blkdev->dev); | |
1276 | g_free(blkdev->devtype); | |
62d23efa AL |
1277 | qemu_bh_delete(blkdev->bh); |
1278 | return 0; | |
1279 | } | |
1280 | ||
1281 | static void blk_event(struct XenDevice *xendev) | |
1282 | { | |
1283 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
1284 | ||
1285 | qemu_bh_schedule(blkdev->bh); | |
1286 | } | |
1287 | ||
1288 | struct XenDevOps xen_blkdev_ops = { | |
1289 | .size = sizeof(struct XenBlkDev), | |
1290 | .flags = DEVOPS_FLAG_NEED_GNTDEV, | |
1291 | .alloc = blk_alloc, | |
1292 | .init = blk_init, | |
384087b2 | 1293 | .initialise = blk_connect, |
62d23efa AL |
1294 | .disconnect = blk_disconnect, |
1295 | .event = blk_event, | |
1296 | .free = blk_free, | |
1297 | }; |