]>
Commit | Line | Data |
---|---|---|
4ea7d1a7 | 1 | /* |
ca072800 PD |
2 | * Copyright (c) 2018 Citrix Systems Inc. |
3 | * (c) Gerd Hoffmann <[email protected]> | |
4ea7d1a7 | 4 | * |
ca072800 PD |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | |
7 | * the Free Software Foundation; under version 2 of the License. | |
4ea7d1a7 | 8 | * |
ca072800 PD |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
4ea7d1a7 | 13 | * |
ca072800 PD |
14 | * You should have received a copy of the GNU General Public License along |
15 | * with this program; if not, see <http://www.gnu.org/licenses/>. | |
4ea7d1a7 | 16 | * |
ca072800 PD |
17 | * Contributions after 2012-01-13 are licensed under the terms of the |
18 | * GNU GPL, version 2 or (at your option) any later version. | |
4ea7d1a7 PD |
19 | */ |
20 | ||
fcab2b46 PD |
21 | #include "qemu/osdep.h" |
22 | #include "qemu/error-report.h" | |
db725815 | 23 | #include "qemu/main-loop.h" |
fcab2b46 | 24 | #include "qapi/error.h" |
fcab2b46 PD |
25 | #include "hw/xen/xen_common.h" |
26 | #include "hw/block/xen_blkif.h" | |
27 | #include "sysemu/block-backend.h" | |
28 | #include "sysemu/iothread.h" | |
29 | #include "xen-block.h" | |
30 | ||
e7f5b5f8 | 31 | typedef struct XenBlockRequest { |
fcab2b46 PD |
32 | blkif_request_t req; |
33 | int16_t status; | |
34 | off_t start; | |
35 | QEMUIOVector v; | |
36 | void *buf; | |
37 | size_t size; | |
38 | int presync; | |
39 | int aio_inflight; | |
40 | int aio_errors; | |
f3b604e3 | 41 | XenBlockDataPlane *dataplane; |
e7f5b5f8 | 42 | QLIST_ENTRY(XenBlockRequest) list; |
fcab2b46 | 43 | BlockAcctCookie acct; |
e7f5b5f8 | 44 | } XenBlockRequest; |
4ea7d1a7 | 45 | |
f3b604e3 | 46 | struct XenBlockDataPlane { |
fcab2b46 PD |
47 | XenDevice *xendev; |
48 | XenEventChannel *event_channel; | |
49 | unsigned int *ring_ref; | |
50 | unsigned int nr_ring_ref; | |
51 | void *sring; | |
fcab2b46 PD |
52 | int protocol; |
53 | blkif_back_rings_t rings; | |
54 | int more_work; | |
e7f5b5f8 | 55 | QLIST_HEAD(inflight_head, XenBlockRequest) inflight; |
e7f5b5f8 | 56 | QLIST_HEAD(freelist_head, XenBlockRequest) freelist; |
fcab2b46 PD |
57 | int requests_total; |
58 | int requests_inflight; | |
fcab2b46 PD |
59 | unsigned int max_requests; |
60 | BlockBackend *blk; | |
5feeb718 | 61 | unsigned int sector_size; |
fcab2b46 PD |
62 | QEMUBH *bh; |
63 | IOThread *iothread; | |
64 | AioContext *ctx; | |
4ea7d1a7 PD |
65 | }; |
66 | ||
d4683cf9 | 67 | static void reset_request(XenBlockRequest *request) |
4ea7d1a7 | 68 | { |
e7f5b5f8 PD |
69 | memset(&request->req, 0, sizeof(request->req)); |
70 | request->status = 0; | |
71 | request->start = 0; | |
e7f5b5f8 PD |
72 | request->size = 0; |
73 | request->presync = 0; | |
4ea7d1a7 | 74 | |
e7f5b5f8 PD |
75 | request->aio_inflight = 0; |
76 | request->aio_errors = 0; | |
4ea7d1a7 | 77 | |
e7f5b5f8 PD |
78 | request->dataplane = NULL; |
79 | memset(&request->list, 0, sizeof(request->list)); | |
80 | memset(&request->acct, 0, sizeof(request->acct)); | |
4ea7d1a7 | 81 | |
e7f5b5f8 | 82 | qemu_iovec_reset(&request->v); |
4ea7d1a7 PD |
83 | } |
84 | ||
d4683cf9 | 85 | static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane) |
4ea7d1a7 | 86 | { |
e7f5b5f8 | 87 | XenBlockRequest *request = NULL; |
4ea7d1a7 | 88 | |
f3b604e3 PD |
89 | if (QLIST_EMPTY(&dataplane->freelist)) { |
90 | if (dataplane->requests_total >= dataplane->max_requests) { | |
4ea7d1a7 PD |
91 | goto out; |
92 | } | |
93 | /* allocate new struct */ | |
e7f5b5f8 PD |
94 | request = g_malloc0(sizeof(*request)); |
95 | request->dataplane = dataplane; | |
c6025bd1 TS |
96 | /* |
97 | * We cannot need more pages per requests than this, and since we | |
98 | * re-use requests, allocate the memory once here. It will be freed | |
99 | * xen_block_dataplane_destroy() when the request list is freed. | |
100 | */ | |
101 | request->buf = qemu_memalign(XC_PAGE_SIZE, | |
102 | BLKIF_MAX_SEGMENTS_PER_REQUEST * | |
103 | XC_PAGE_SIZE); | |
f3b604e3 | 104 | dataplane->requests_total++; |
e7f5b5f8 | 105 | qemu_iovec_init(&request->v, 1); |
4ea7d1a7 PD |
106 | } else { |
107 | /* get one from freelist */ | |
e7f5b5f8 PD |
108 | request = QLIST_FIRST(&dataplane->freelist); |
109 | QLIST_REMOVE(request, list); | |
4ea7d1a7 | 110 | } |
e7f5b5f8 | 111 | QLIST_INSERT_HEAD(&dataplane->inflight, request, list); |
f3b604e3 | 112 | dataplane->requests_inflight++; |
4ea7d1a7 PD |
113 | |
114 | out: | |
e7f5b5f8 | 115 | return request; |
4ea7d1a7 PD |
116 | } |
117 | ||
d4683cf9 | 118 | static void xen_block_finish_request(XenBlockRequest *request) |
4ea7d1a7 | 119 | { |
e7f5b5f8 | 120 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 | 121 | |
e7f5b5f8 | 122 | QLIST_REMOVE(request, list); |
f3b604e3 | 123 | dataplane->requests_inflight--; |
4ea7d1a7 PD |
124 | } |
125 | ||
bfd0d636 | 126 | static void xen_block_release_request(XenBlockRequest *request) |
4ea7d1a7 | 127 | { |
e7f5b5f8 | 128 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 | 129 | |
e7f5b5f8 | 130 | QLIST_REMOVE(request, list); |
d4683cf9 | 131 | reset_request(request); |
e7f5b5f8 PD |
132 | request->dataplane = dataplane; |
133 | QLIST_INSERT_HEAD(&dataplane->freelist, request, list); | |
bfd0d636 | 134 | dataplane->requests_inflight--; |
4ea7d1a7 PD |
135 | } |
136 | ||
137 | /* | |
138 | * translate request into iovec + start offset | |
139 | * do sanity checks along the way | |
140 | */ | |
d4683cf9 | 141 | static int xen_block_parse_request(XenBlockRequest *request) |
4ea7d1a7 | 142 | { |
e7f5b5f8 | 143 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 PD |
144 | size_t len; |
145 | int i; | |
146 | ||
e7f5b5f8 | 147 | switch (request->req.operation) { |
4ea7d1a7 PD |
148 | case BLKIF_OP_READ: |
149 | break; | |
150 | case BLKIF_OP_FLUSH_DISKCACHE: | |
e7f5b5f8 PD |
151 | request->presync = 1; |
152 | if (!request->req.nr_segments) { | |
4ea7d1a7 PD |
153 | return 0; |
154 | } | |
155 | /* fall through */ | |
156 | case BLKIF_OP_WRITE: | |
157 | break; | |
158 | case BLKIF_OP_DISCARD: | |
159 | return 0; | |
160 | default: | |
e7f5b5f8 | 161 | error_report("error: unknown operation (%d)", request->req.operation); |
4ea7d1a7 PD |
162 | goto err; |
163 | }; | |
164 | ||
e7f5b5f8 | 165 | if (request->req.operation != BLKIF_OP_READ && |
f3b604e3 | 166 | blk_is_read_only(dataplane->blk)) { |
ca072800 | 167 | error_report("error: write req for ro device"); |
4ea7d1a7 PD |
168 | goto err; |
169 | } | |
170 | ||
5feeb718 | 171 | request->start = request->req.sector_number * dataplane->sector_size; |
e7f5b5f8 | 172 | for (i = 0; i < request->req.nr_segments; i++) { |
4ea7d1a7 | 173 | if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { |
ca072800 | 174 | error_report("error: nr_segments too big"); |
4ea7d1a7 PD |
175 | goto err; |
176 | } | |
e7f5b5f8 | 177 | if (request->req.seg[i].first_sect > request->req.seg[i].last_sect) { |
ca072800 | 178 | error_report("error: first > last sector"); |
4ea7d1a7 PD |
179 | goto err; |
180 | } | |
5feeb718 | 181 | if (request->req.seg[i].last_sect * dataplane->sector_size >= |
e7f5b5f8 | 182 | XC_PAGE_SIZE) { |
ca072800 | 183 | error_report("error: page crossing"); |
4ea7d1a7 PD |
184 | goto err; |
185 | } | |
186 | ||
e7f5b5f8 | 187 | len = (request->req.seg[i].last_sect - |
5feeb718 | 188 | request->req.seg[i].first_sect + 1) * dataplane->sector_size; |
e7f5b5f8 | 189 | request->size += len; |
4ea7d1a7 | 190 | } |
3149f183 | 191 | if (request->start + request->size > blk_getlength(dataplane->blk)) { |
ca072800 | 192 | error_report("error: access beyond end of file"); |
4ea7d1a7 PD |
193 | goto err; |
194 | } | |
195 | return 0; | |
196 | ||
197 | err: | |
e7f5b5f8 | 198 | request->status = BLKIF_RSP_ERROR; |
4ea7d1a7 PD |
199 | return -1; |
200 | } | |
201 | ||
d4683cf9 | 202 | static int xen_block_copy_request(XenBlockRequest *request) |
4ea7d1a7 | 203 | { |
e7f5b5f8 | 204 | XenBlockDataPlane *dataplane = request->dataplane; |
f3b604e3 | 205 | XenDevice *xendev = dataplane->xendev; |
fcab2b46 PD |
206 | XenDeviceGrantCopySegment segs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
207 | int i, count; | |
e7f5b5f8 PD |
208 | bool to_domain = (request->req.operation == BLKIF_OP_READ); |
209 | void *virt = request->buf; | |
fcab2b46 | 210 | Error *local_err = NULL; |
4ea7d1a7 | 211 | |
e7f5b5f8 | 212 | if (request->req.nr_segments == 0) { |
4ea7d1a7 PD |
213 | return 0; |
214 | } | |
215 | ||
e7f5b5f8 | 216 | count = request->req.nr_segments; |
4ea7d1a7 PD |
217 | |
218 | for (i = 0; i < count; i++) { | |
219 | if (to_domain) { | |
e7f5b5f8 PD |
220 | segs[i].dest.foreign.ref = request->req.seg[i].gref; |
221 | segs[i].dest.foreign.offset = request->req.seg[i].first_sect * | |
5feeb718 | 222 | dataplane->sector_size; |
4ea7d1a7 PD |
223 | segs[i].source.virt = virt; |
224 | } else { | |
e7f5b5f8 PD |
225 | segs[i].source.foreign.ref = request->req.seg[i].gref; |
226 | segs[i].source.foreign.offset = request->req.seg[i].first_sect * | |
5feeb718 | 227 | dataplane->sector_size; |
4ea7d1a7 PD |
228 | segs[i].dest.virt = virt; |
229 | } | |
e7f5b5f8 | 230 | segs[i].len = (request->req.seg[i].last_sect - |
2bcd05cf | 231 | request->req.seg[i].first_sect + 1) * |
5feeb718 | 232 | dataplane->sector_size; |
4ea7d1a7 PD |
233 | virt += segs[i].len; |
234 | } | |
235 | ||
fcab2b46 PD |
236 | xen_device_copy_grant_refs(xendev, to_domain, segs, count, &local_err); |
237 | ||
238 | if (local_err) { | |
239 | error_reportf_err(local_err, "failed to copy data: "); | |
4ea7d1a7 | 240 | |
e7f5b5f8 | 241 | request->aio_errors++; |
4ea7d1a7 PD |
242 | return -1; |
243 | } | |
244 | ||
fcab2b46 | 245 | return 0; |
4ea7d1a7 PD |
246 | } |
247 | ||
d4683cf9 | 248 | static int xen_block_do_aio(XenBlockRequest *request); |
bfd0d636 | 249 | static int xen_block_send_response(XenBlockRequest *request); |
4ea7d1a7 | 250 | |
d4683cf9 | 251 | static void xen_block_complete_aio(void *opaque, int ret) |
4ea7d1a7 | 252 | { |
e7f5b5f8 PD |
253 | XenBlockRequest *request = opaque; |
254 | XenBlockDataPlane *dataplane = request->dataplane; | |
4ea7d1a7 | 255 | |
f3b604e3 | 256 | aio_context_acquire(dataplane->ctx); |
4ea7d1a7 PD |
257 | |
258 | if (ret != 0) { | |
ca072800 | 259 | error_report("%s I/O error", |
e7f5b5f8 | 260 | request->req.operation == BLKIF_OP_READ ? |
ca072800 | 261 | "read" : "write"); |
e7f5b5f8 | 262 | request->aio_errors++; |
4ea7d1a7 PD |
263 | } |
264 | ||
e7f5b5f8 PD |
265 | request->aio_inflight--; |
266 | if (request->presync) { | |
267 | request->presync = 0; | |
d4683cf9 | 268 | xen_block_do_aio(request); |
4ea7d1a7 PD |
269 | goto done; |
270 | } | |
e7f5b5f8 | 271 | if (request->aio_inflight > 0) { |
4ea7d1a7 PD |
272 | goto done; |
273 | } | |
274 | ||
e7f5b5f8 | 275 | switch (request->req.operation) { |
4ea7d1a7 | 276 | case BLKIF_OP_READ: |
e7f5b5f8 | 277 | /* in case of failure request->aio_errors is increased */ |
4ea7d1a7 | 278 | if (ret == 0) { |
d4683cf9 | 279 | xen_block_copy_request(request); |
4ea7d1a7 | 280 | } |
4ea7d1a7 PD |
281 | break; |
282 | case BLKIF_OP_WRITE: | |
283 | case BLKIF_OP_FLUSH_DISKCACHE: | |
4ea7d1a7 PD |
284 | default: |
285 | break; | |
286 | } | |
287 | ||
e7f5b5f8 | 288 | request->status = request->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; |
d4683cf9 | 289 | xen_block_finish_request(request); |
4ea7d1a7 | 290 | |
e7f5b5f8 | 291 | switch (request->req.operation) { |
4ea7d1a7 PD |
292 | case BLKIF_OP_WRITE: |
293 | case BLKIF_OP_FLUSH_DISKCACHE: | |
e7f5b5f8 | 294 | if (!request->req.nr_segments) { |
4ea7d1a7 PD |
295 | break; |
296 | } | |
e02d9494 | 297 | /* fall through */ |
4ea7d1a7 | 298 | case BLKIF_OP_READ: |
e7f5b5f8 PD |
299 | if (request->status == BLKIF_RSP_OKAY) { |
300 | block_acct_done(blk_get_stats(dataplane->blk), &request->acct); | |
4ea7d1a7 | 301 | } else { |
e7f5b5f8 | 302 | block_acct_failed(blk_get_stats(dataplane->blk), &request->acct); |
4ea7d1a7 PD |
303 | } |
304 | break; | |
305 | case BLKIF_OP_DISCARD: | |
306 | default: | |
307 | break; | |
308 | } | |
bfd0d636 TS |
309 | if (xen_block_send_response(request)) { |
310 | Error *local_err = NULL; | |
311 | ||
312 | xen_device_notify_event_channel(dataplane->xendev, | |
313 | dataplane->event_channel, | |
314 | &local_err); | |
315 | if (local_err) { | |
316 | error_report_err(local_err); | |
317 | } | |
318 | } | |
319 | xen_block_release_request(request); | |
320 | ||
345f42b4 PD |
321 | if (dataplane->more_work) { |
322 | qemu_bh_schedule(dataplane->bh); | |
323 | } | |
4ea7d1a7 PD |
324 | |
325 | done: | |
f3b604e3 | 326 | aio_context_release(dataplane->ctx); |
4ea7d1a7 PD |
327 | } |
328 | ||
d4683cf9 PD |
329 | static bool xen_block_split_discard(XenBlockRequest *request, |
330 | blkif_sector_t sector_number, | |
331 | uint64_t nr_sectors) | |
4ea7d1a7 | 332 | { |
e7f5b5f8 | 333 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 PD |
334 | int64_t byte_offset; |
335 | int byte_chunk; | |
2bcd05cf | 336 | uint64_t byte_remaining; |
4ea7d1a7 PD |
337 | uint64_t sec_start = sector_number; |
338 | uint64_t sec_count = nr_sectors; | |
339 | ||
340 | /* Wrap around, or overflowing byte limit? */ | |
341 | if (sec_start + sec_count < sec_count || | |
5feeb718 | 342 | sec_start + sec_count > INT64_MAX / dataplane->sector_size) { |
4ea7d1a7 PD |
343 | return false; |
344 | } | |
345 | ||
5feeb718 PD |
346 | byte_offset = sec_start * dataplane->sector_size; |
347 | byte_remaining = sec_count * dataplane->sector_size; | |
4ea7d1a7 PD |
348 | |
349 | do { | |
2bcd05cf PD |
350 | byte_chunk = byte_remaining > BDRV_REQUEST_MAX_BYTES ? |
351 | BDRV_REQUEST_MAX_BYTES : byte_remaining; | |
e7f5b5f8 | 352 | request->aio_inflight++; |
f3b604e3 | 353 | blk_aio_pdiscard(dataplane->blk, byte_offset, byte_chunk, |
d4683cf9 | 354 | xen_block_complete_aio, request); |
4ea7d1a7 PD |
355 | byte_remaining -= byte_chunk; |
356 | byte_offset += byte_chunk; | |
357 | } while (byte_remaining > 0); | |
358 | ||
359 | return true; | |
360 | } | |
361 | ||
d4683cf9 | 362 | static int xen_block_do_aio(XenBlockRequest *request) |
4ea7d1a7 | 363 | { |
e7f5b5f8 PD |
364 | XenBlockDataPlane *dataplane = request->dataplane; |
365 | ||
e7f5b5f8 PD |
366 | if (request->req.nr_segments && |
367 | (request->req.operation == BLKIF_OP_WRITE || | |
368 | request->req.operation == BLKIF_OP_FLUSH_DISKCACHE) && | |
d4683cf9 | 369 | xen_block_copy_request(request)) { |
4ea7d1a7 PD |
370 | goto err; |
371 | } | |
372 | ||
e7f5b5f8 PD |
373 | request->aio_inflight++; |
374 | if (request->presync) { | |
d4683cf9 PD |
375 | blk_aio_flush(request->dataplane->blk, xen_block_complete_aio, |
376 | request); | |
4ea7d1a7 PD |
377 | return 0; |
378 | } | |
379 | ||
e7f5b5f8 | 380 | switch (request->req.operation) { |
4ea7d1a7 | 381 | case BLKIF_OP_READ: |
e7f5b5f8 PD |
382 | qemu_iovec_add(&request->v, request->buf, request->size); |
383 | block_acct_start(blk_get_stats(dataplane->blk), &request->acct, | |
384 | request->v.size, BLOCK_ACCT_READ); | |
385 | request->aio_inflight++; | |
386 | blk_aio_preadv(dataplane->blk, request->start, &request->v, 0, | |
d4683cf9 | 387 | xen_block_complete_aio, request); |
4ea7d1a7 PD |
388 | break; |
389 | case BLKIF_OP_WRITE: | |
390 | case BLKIF_OP_FLUSH_DISKCACHE: | |
e7f5b5f8 | 391 | if (!request->req.nr_segments) { |
4ea7d1a7 PD |
392 | break; |
393 | } | |
394 | ||
e7f5b5f8 PD |
395 | qemu_iovec_add(&request->v, request->buf, request->size); |
396 | block_acct_start(blk_get_stats(dataplane->blk), &request->acct, | |
397 | request->v.size, | |
398 | request->req.operation == BLKIF_OP_WRITE ? | |
4ea7d1a7 | 399 | BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH); |
e7f5b5f8 PD |
400 | request->aio_inflight++; |
401 | blk_aio_pwritev(dataplane->blk, request->start, &request->v, 0, | |
d4683cf9 | 402 | xen_block_complete_aio, request); |
4ea7d1a7 PD |
403 | break; |
404 | case BLKIF_OP_DISCARD: | |
405 | { | |
e7f5b5f8 | 406 | struct blkif_request_discard *req = (void *)&request->req; |
d4683cf9 PD |
407 | if (!xen_block_split_discard(request, req->sector_number, |
408 | req->nr_sectors)) { | |
4ea7d1a7 PD |
409 | goto err; |
410 | } | |
411 | break; | |
412 | } | |
413 | default: | |
414 | /* unknown operation (shouldn't happen -- parse catches this) */ | |
415 | goto err; | |
416 | } | |
417 | ||
d4683cf9 | 418 | xen_block_complete_aio(request, 0); |
4ea7d1a7 PD |
419 | |
420 | return 0; | |
421 | ||
422 | err: | |
d4683cf9 | 423 | xen_block_finish_request(request); |
e7f5b5f8 | 424 | request->status = BLKIF_RSP_ERROR; |
4ea7d1a7 PD |
425 | return -1; |
426 | } | |
427 | ||
bfd0d636 | 428 | static int xen_block_send_response(XenBlockRequest *request) |
4ea7d1a7 | 429 | { |
e7f5b5f8 | 430 | XenBlockDataPlane *dataplane = request->dataplane; |
fcab2b46 PD |
431 | int send_notify = 0; |
432 | int have_requests = 0; | |
433 | blkif_response_t *resp; | |
4ea7d1a7 PD |
434 | |
435 | /* Place on the response ring for the relevant domain. */ | |
f3b604e3 | 436 | switch (dataplane->protocol) { |
4ea7d1a7 PD |
437 | case BLKIF_PROTOCOL_NATIVE: |
438 | resp = (blkif_response_t *)RING_GET_RESPONSE( | |
f3b604e3 PD |
439 | &dataplane->rings.native, |
440 | dataplane->rings.native.rsp_prod_pvt); | |
4ea7d1a7 PD |
441 | break; |
442 | case BLKIF_PROTOCOL_X86_32: | |
443 | resp = (blkif_response_t *)RING_GET_RESPONSE( | |
f3b604e3 PD |
444 | &dataplane->rings.x86_32_part, |
445 | dataplane->rings.x86_32_part.rsp_prod_pvt); | |
4ea7d1a7 PD |
446 | break; |
447 | case BLKIF_PROTOCOL_X86_64: | |
448 | resp = (blkif_response_t *)RING_GET_RESPONSE( | |
f3b604e3 PD |
449 | &dataplane->rings.x86_64_part, |
450 | dataplane->rings.x86_64_part.rsp_prod_pvt); | |
4ea7d1a7 PD |
451 | break; |
452 | default: | |
453 | return 0; | |
454 | } | |
455 | ||
e7f5b5f8 PD |
456 | resp->id = request->req.id; |
457 | resp->operation = request->req.operation; | |
458 | resp->status = request->status; | |
4ea7d1a7 | 459 | |
f3b604e3 | 460 | dataplane->rings.common.rsp_prod_pvt++; |
4ea7d1a7 | 461 | |
f3b604e3 PD |
462 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&dataplane->rings.common, |
463 | send_notify); | |
464 | if (dataplane->rings.common.rsp_prod_pvt == | |
465 | dataplane->rings.common.req_cons) { | |
4ea7d1a7 PD |
466 | /* |
467 | * Tail check for pending requests. Allows frontend to avoid | |
468 | * notifications if requests are already in flight (lower | |
469 | * overheads and promotes batching). | |
470 | */ | |
f3b604e3 PD |
471 | RING_FINAL_CHECK_FOR_REQUESTS(&dataplane->rings.common, |
472 | have_requests); | |
473 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&dataplane->rings.common)) { | |
4ea7d1a7 PD |
474 | have_requests = 1; |
475 | } | |
476 | ||
477 | if (have_requests) { | |
f3b604e3 | 478 | dataplane->more_work++; |
4ea7d1a7 PD |
479 | } |
480 | return send_notify; | |
481 | } | |
482 | ||
d4683cf9 PD |
483 | static int xen_block_get_request(XenBlockDataPlane *dataplane, |
484 | XenBlockRequest *request, RING_IDX rc) | |
4ea7d1a7 | 485 | { |
f3b604e3 PD |
486 | switch (dataplane->protocol) { |
487 | case BLKIF_PROTOCOL_NATIVE: { | |
488 | blkif_request_t *req = | |
489 | RING_GET_REQUEST(&dataplane->rings.native, rc); | |
490 | ||
e7f5b5f8 | 491 | memcpy(&request->req, req, sizeof(request->req)); |
4ea7d1a7 | 492 | break; |
f3b604e3 PD |
493 | } |
494 | case BLKIF_PROTOCOL_X86_32: { | |
495 | blkif_x86_32_request_t *req = | |
496 | RING_GET_REQUEST(&dataplane->rings.x86_32_part, rc); | |
497 | ||
e7f5b5f8 | 498 | blkif_get_x86_32_req(&request->req, req); |
4ea7d1a7 | 499 | break; |
f3b604e3 PD |
500 | } |
501 | case BLKIF_PROTOCOL_X86_64: { | |
502 | blkif_x86_64_request_t *req = | |
503 | RING_GET_REQUEST(&dataplane->rings.x86_64_part, rc); | |
504 | ||
e7f5b5f8 | 505 | blkif_get_x86_64_req(&request->req, req); |
4ea7d1a7 PD |
506 | break; |
507 | } | |
f3b604e3 | 508 | } |
4ea7d1a7 PD |
509 | /* Prevent the compiler from accessing the on-ring fields instead. */ |
510 | barrier(); | |
511 | return 0; | |
512 | } | |
513 | ||
6de45f91 TS |
514 | /* |
515 | * Threshold of in-flight requests above which we will start using | |
516 | * blk_io_plug()/blk_io_unplug() to batch requests. | |
517 | */ | |
518 | #define IO_PLUG_THRESHOLD 1 | |
519 | ||
345f42b4 | 520 | static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) |
4ea7d1a7 PD |
521 | { |
522 | RING_IDX rc, rp; | |
e7f5b5f8 | 523 | XenBlockRequest *request; |
6de45f91 TS |
524 | int inflight_atstart = dataplane->requests_inflight; |
525 | int batched = 0; | |
345f42b4 | 526 | bool done_something = false; |
4ea7d1a7 | 527 | |
f3b604e3 | 528 | dataplane->more_work = 0; |
4ea7d1a7 | 529 | |
f3b604e3 PD |
530 | rc = dataplane->rings.common.req_cons; |
531 | rp = dataplane->rings.common.sring->req_prod; | |
4ea7d1a7 PD |
532 | xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ |
533 | ||
6de45f91 TS |
534 | /* |
535 | * If there was more than IO_PLUG_THRESHOLD requests in flight | |
536 | * when we got here, this is an indication that there the bottleneck | |
537 | * is below us, so it's worth beginning to batch up I/O requests | |
538 | * rather than submitting them immediately. The maximum number | |
539 | * of requests we're willing to batch is the number already in | |
540 | * flight, so it can grow up to max_requests when the bottleneck | |
541 | * is below us. | |
542 | */ | |
543 | if (inflight_atstart > IO_PLUG_THRESHOLD) { | |
544 | blk_io_plug(dataplane->blk); | |
545 | } | |
4ea7d1a7 PD |
546 | while (rc != rp) { |
547 | /* pull request from ring */ | |
f3b604e3 | 548 | if (RING_REQUEST_CONS_OVERFLOW(&dataplane->rings.common, rc)) { |
4ea7d1a7 PD |
549 | break; |
550 | } | |
d4683cf9 | 551 | request = xen_block_start_request(dataplane); |
e7f5b5f8 | 552 | if (request == NULL) { |
f3b604e3 | 553 | dataplane->more_work++; |
4ea7d1a7 PD |
554 | break; |
555 | } | |
d4683cf9 | 556 | xen_block_get_request(dataplane, request, rc); |
f3b604e3 | 557 | dataplane->rings.common.req_cons = ++rc; |
345f42b4 | 558 | done_something = true; |
4ea7d1a7 PD |
559 | |
560 | /* parse them */ | |
d4683cf9 | 561 | if (xen_block_parse_request(request) != 0) { |
e7f5b5f8 | 562 | switch (request->req.operation) { |
4ea7d1a7 | 563 | case BLKIF_OP_READ: |
f3b604e3 | 564 | block_acct_invalid(blk_get_stats(dataplane->blk), |
4ea7d1a7 PD |
565 | BLOCK_ACCT_READ); |
566 | break; | |
567 | case BLKIF_OP_WRITE: | |
f3b604e3 | 568 | block_acct_invalid(blk_get_stats(dataplane->blk), |
4ea7d1a7 PD |
569 | BLOCK_ACCT_WRITE); |
570 | break; | |
571 | case BLKIF_OP_FLUSH_DISKCACHE: | |
f3b604e3 | 572 | block_acct_invalid(blk_get_stats(dataplane->blk), |
4ea7d1a7 PD |
573 | BLOCK_ACCT_FLUSH); |
574 | default: | |
575 | break; | |
576 | }; | |
577 | ||
bfd0d636 | 578 | if (xen_block_send_response(request)) { |
fcab2b46 PD |
579 | Error *local_err = NULL; |
580 | ||
f3b604e3 PD |
581 | xen_device_notify_event_channel(dataplane->xendev, |
582 | dataplane->event_channel, | |
fcab2b46 PD |
583 | &local_err); |
584 | if (local_err) { | |
585 | error_report_err(local_err); | |
586 | } | |
4ea7d1a7 | 587 | } |
bfd0d636 | 588 | xen_block_release_request(request); |
4ea7d1a7 PD |
589 | continue; |
590 | } | |
591 | ||
6de45f91 TS |
592 | if (inflight_atstart > IO_PLUG_THRESHOLD && |
593 | batched >= inflight_atstart) { | |
594 | blk_io_unplug(dataplane->blk); | |
595 | } | |
d4683cf9 | 596 | xen_block_do_aio(request); |
6de45f91 TS |
597 | if (inflight_atstart > IO_PLUG_THRESHOLD) { |
598 | if (batched >= inflight_atstart) { | |
599 | blk_io_plug(dataplane->blk); | |
600 | batched = 0; | |
601 | } else { | |
602 | batched++; | |
603 | } | |
604 | } | |
605 | } | |
606 | if (inflight_atstart > IO_PLUG_THRESHOLD) { | |
607 | blk_io_unplug(dataplane->blk); | |
4ea7d1a7 PD |
608 | } |
609 | ||
345f42b4 | 610 | return done_something; |
4ea7d1a7 PD |
611 | } |
612 | ||
d4683cf9 | 613 | static void xen_block_dataplane_bh(void *opaque) |
4ea7d1a7 | 614 | { |
f3b604e3 | 615 | XenBlockDataPlane *dataplane = opaque; |
4ea7d1a7 | 616 | |
f3b604e3 | 617 | aio_context_acquire(dataplane->ctx); |
d4683cf9 | 618 | xen_block_handle_requests(dataplane); |
f3b604e3 | 619 | aio_context_release(dataplane->ctx); |
4ea7d1a7 PD |
620 | } |
621 | ||
345f42b4 | 622 | static bool xen_block_dataplane_event(void *opaque) |
fcab2b46 | 623 | { |
f3b604e3 | 624 | XenBlockDataPlane *dataplane = opaque; |
fcab2b46 | 625 | |
345f42b4 | 626 | return xen_block_handle_requests(dataplane); |
fcab2b46 PD |
627 | } |
628 | ||
f3b604e3 | 629 | XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, |
5feeb718 PD |
630 | BlockBackend *blk, |
631 | unsigned int sector_size, | |
f3b604e3 | 632 | IOThread *iothread) |
4ea7d1a7 | 633 | { |
f3b604e3 | 634 | XenBlockDataPlane *dataplane = g_new0(XenBlockDataPlane, 1); |
4ea7d1a7 | 635 | |
f3b604e3 | 636 | dataplane->xendev = xendev; |
5feeb718 PD |
637 | dataplane->blk = blk; |
638 | dataplane->sector_size = sector_size; | |
4ea7d1a7 | 639 | |
f3b604e3 | 640 | QLIST_INIT(&dataplane->inflight); |
f3b604e3 | 641 | QLIST_INIT(&dataplane->freelist); |
4ea7d1a7 | 642 | |
fcab2b46 | 643 | if (iothread) { |
f3b604e3 PD |
644 | dataplane->iothread = iothread; |
645 | object_ref(OBJECT(dataplane->iothread)); | |
646 | dataplane->ctx = iothread_get_aio_context(dataplane->iothread); | |
fcab2b46 | 647 | } else { |
f3b604e3 | 648 | dataplane->ctx = qemu_get_aio_context(); |
fcab2b46 | 649 | } |
d4683cf9 PD |
650 | dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, |
651 | dataplane); | |
fcab2b46 | 652 | |
f3b604e3 | 653 | return dataplane; |
4ea7d1a7 PD |
654 | } |
655 | ||
f3b604e3 | 656 | void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane) |
4ea7d1a7 | 657 | { |
e7f5b5f8 | 658 | XenBlockRequest *request; |
4ea7d1a7 | 659 | |
f3b604e3 | 660 | if (!dataplane) { |
fcab2b46 PD |
661 | return; |
662 | } | |
4ea7d1a7 | 663 | |
f3b604e3 | 664 | while (!QLIST_EMPTY(&dataplane->freelist)) { |
e7f5b5f8 PD |
665 | request = QLIST_FIRST(&dataplane->freelist); |
666 | QLIST_REMOVE(request, list); | |
667 | qemu_iovec_destroy(&request->v); | |
c6025bd1 | 668 | qemu_vfree(request->buf); |
e7f5b5f8 | 669 | g_free(request); |
4ea7d1a7 PD |
670 | } |
671 | ||
f3b604e3 PD |
672 | qemu_bh_delete(dataplane->bh); |
673 | if (dataplane->iothread) { | |
674 | object_unref(OBJECT(dataplane->iothread)); | |
fcab2b46 PD |
675 | } |
676 | ||
f3b604e3 | 677 | g_free(dataplane); |
4ea7d1a7 PD |
678 | } |
679 | ||
f3b604e3 | 680 | void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) |
4ea7d1a7 | 681 | { |
fcab2b46 | 682 | XenDevice *xendev; |
4ea7d1a7 | 683 | |
f3b604e3 | 684 | if (!dataplane) { |
fcab2b46 PD |
685 | return; |
686 | } | |
687 | ||
f3b604e3 | 688 | aio_context_acquire(dataplane->ctx); |
97896a48 KW |
689 | /* Xen doesn't have multiple users for nodes, so this can't fail */ |
690 | blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); | |
f3b604e3 | 691 | aio_context_release(dataplane->ctx); |
fcab2b46 | 692 | |
f3b604e3 | 693 | xendev = dataplane->xendev; |
fcab2b46 | 694 | |
f3b604e3 | 695 | if (dataplane->event_channel) { |
fcab2b46 PD |
696 | Error *local_err = NULL; |
697 | ||
f3b604e3 | 698 | xen_device_unbind_event_channel(xendev, dataplane->event_channel, |
fcab2b46 | 699 | &local_err); |
f3b604e3 | 700 | dataplane->event_channel = NULL; |
fcab2b46 PD |
701 | |
702 | if (local_err) { | |
703 | error_report_err(local_err); | |
704 | } | |
705 | } | |
706 | ||
f3b604e3 | 707 | if (dataplane->sring) { |
fcab2b46 PD |
708 | Error *local_err = NULL; |
709 | ||
f3b604e3 PD |
710 | xen_device_unmap_grant_refs(xendev, dataplane->sring, |
711 | dataplane->nr_ring_ref, &local_err); | |
712 | dataplane->sring = NULL; | |
fcab2b46 PD |
713 | |
714 | if (local_err) { | |
715 | error_report_err(local_err); | |
716 | } | |
717 | } | |
718 | ||
f3b604e3 PD |
719 | g_free(dataplane->ring_ref); |
720 | dataplane->ring_ref = NULL; | |
fcab2b46 PD |
721 | } |
722 | ||
f3b604e3 | 723 | void xen_block_dataplane_start(XenBlockDataPlane *dataplane, |
fcab2b46 PD |
724 | const unsigned int ring_ref[], |
725 | unsigned int nr_ring_ref, | |
726 | unsigned int event_channel, | |
727 | unsigned int protocol, | |
728 | Error **errp) | |
729 | { | |
f3b604e3 | 730 | XenDevice *xendev = dataplane->xendev; |
fcab2b46 PD |
731 | Error *local_err = NULL; |
732 | unsigned int ring_size; | |
733 | unsigned int i; | |
734 | ||
f3b604e3 PD |
735 | dataplane->nr_ring_ref = nr_ring_ref; |
736 | dataplane->ring_ref = g_new(unsigned int, nr_ring_ref); | |
fcab2b46 PD |
737 | |
738 | for (i = 0; i < nr_ring_ref; i++) { | |
f3b604e3 | 739 | dataplane->ring_ref[i] = ring_ref[i]; |
fcab2b46 PD |
740 | } |
741 | ||
f3b604e3 | 742 | dataplane->protocol = protocol; |
fcab2b46 | 743 | |
f3b604e3 PD |
744 | ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref; |
745 | switch (dataplane->protocol) { | |
fcab2b46 PD |
746 | case BLKIF_PROTOCOL_NATIVE: |
747 | { | |
f3b604e3 | 748 | dataplane->max_requests = __CONST_RING_SIZE(blkif, ring_size); |
fcab2b46 PD |
749 | break; |
750 | } | |
751 | case BLKIF_PROTOCOL_X86_32: | |
752 | { | |
f3b604e3 | 753 | dataplane->max_requests = __CONST_RING_SIZE(blkif_x86_32, ring_size); |
fcab2b46 PD |
754 | break; |
755 | } | |
756 | case BLKIF_PROTOCOL_X86_64: | |
757 | { | |
f3b604e3 | 758 | dataplane->max_requests = __CONST_RING_SIZE(blkif_x86_64, ring_size); |
fcab2b46 PD |
759 | break; |
760 | } | |
761 | default: | |
f3b604e3 | 762 | error_setg(errp, "unknown protocol %u", dataplane->protocol); |
fcab2b46 PD |
763 | return; |
764 | } | |
765 | ||
f3b604e3 | 766 | xen_device_set_max_grant_refs(xendev, dataplane->nr_ring_ref, |
fcab2b46 PD |
767 | &local_err); |
768 | if (local_err) { | |
769 | error_propagate(errp, local_err); | |
770 | goto stop; | |
771 | } | |
772 | ||
f3b604e3 PD |
773 | dataplane->sring = xen_device_map_grant_refs(xendev, |
774 | dataplane->ring_ref, | |
775 | dataplane->nr_ring_ref, | |
fcab2b46 PD |
776 | PROT_READ | PROT_WRITE, |
777 | &local_err); | |
778 | if (local_err) { | |
779 | error_propagate(errp, local_err); | |
780 | goto stop; | |
781 | } | |
782 | ||
f3b604e3 | 783 | switch (dataplane->protocol) { |
fcab2b46 PD |
784 | case BLKIF_PROTOCOL_NATIVE: |
785 | { | |
f3b604e3 | 786 | blkif_sring_t *sring_native = dataplane->sring; |
fcab2b46 | 787 | |
f3b604e3 | 788 | BACK_RING_INIT(&dataplane->rings.native, sring_native, ring_size); |
fcab2b46 PD |
789 | break; |
790 | } | |
791 | case BLKIF_PROTOCOL_X86_32: | |
792 | { | |
f3b604e3 | 793 | blkif_x86_32_sring_t *sring_x86_32 = dataplane->sring; |
fcab2b46 | 794 | |
f3b604e3 | 795 | BACK_RING_INIT(&dataplane->rings.x86_32_part, sring_x86_32, |
fcab2b46 PD |
796 | ring_size); |
797 | break; | |
798 | } | |
799 | case BLKIF_PROTOCOL_X86_64: | |
800 | { | |
f3b604e3 | 801 | blkif_x86_64_sring_t *sring_x86_64 = dataplane->sring; |
fcab2b46 | 802 | |
f3b604e3 | 803 | BACK_RING_INIT(&dataplane->rings.x86_64_part, sring_x86_64, |
fcab2b46 PD |
804 | ring_size); |
805 | break; | |
806 | } | |
807 | } | |
808 | ||
f3b604e3 | 809 | dataplane->event_channel = |
83361a8a | 810 | xen_device_bind_event_channel(xendev, dataplane->ctx, event_channel, |
d4683cf9 | 811 | xen_block_dataplane_event, dataplane, |
fcab2b46 PD |
812 | &local_err); |
813 | if (local_err) { | |
814 | error_propagate(errp, local_err); | |
815 | goto stop; | |
816 | } | |
817 | ||
f3b604e3 | 818 | aio_context_acquire(dataplane->ctx); |
97896a48 KW |
819 | /* If other users keep the BlockBackend in the iothread, that's ok */ |
820 | blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); | |
f3b604e3 | 821 | aio_context_release(dataplane->ctx); |
fcab2b46 PD |
822 | return; |
823 | ||
824 | stop: | |
f3b604e3 | 825 | xen_block_dataplane_stop(dataplane); |
4ea7d1a7 | 826 | } |