]>
Commit | Line | Data |
---|---|---|
2302c1ca MAL |
1 | /* |
2 | * QEMU Block driver for NBD | |
3 | * | |
b626b51a | 4 | * Copyright (C) 2016 Red Hat, Inc. |
2302c1ca MAL |
5 | * Copyright (C) 2008 Bull S.A.S. |
6 | * Author: Laurent Vivier <[email protected]> | |
7 | * | |
8 | * Some parts: | |
9 | * Copyright (C) 2007 Anthony Liguori <[email protected]> | |
10 | * | |
11 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
12 | * of this software and associated documentation files (the "Software"), to deal | |
13 | * in the Software without restriction, including without limitation the rights | |
14 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
15 | * copies of the Software, and to permit persons to whom the Software is | |
16 | * furnished to do so, subject to the following conditions: | |
17 | * | |
18 | * The above copyright notice and this permission notice shall be included in | |
19 | * all copies or substantial portions of the Software. | |
20 | * | |
21 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
22 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
23 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
24 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
25 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
26 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
27 | * THE SOFTWARE. | |
28 | */ | |
29 | ||
80c71a24 | 30 | #include "qemu/osdep.h" |
be41c100 | 31 | #include "qapi/error.h" |
2302c1ca | 32 | #include "nbd-client.h" |
2302c1ca MAL |
33 | |
34 | #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs)) | |
35 | #define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs)) | |
36 | ||
07b1b99c | 37 | static void nbd_recv_coroutines_wake_all(NBDClientSession *s) |
69152c09 MAL |
38 | { |
39 | int i; | |
40 | ||
41 | for (i = 0; i < MAX_NBD_REQUESTS; i++) { | |
40f4a218 SH |
42 | NBDClientRequest *req = &s->requests[i]; |
43 | ||
44 | if (req->coroutine && req->receiving) { | |
45 | aio_co_wake(req->coroutine); | |
69152c09 MAL |
46 | } |
47 | } | |
48 | } | |
49 | ||
f53a829b | 50 | static void nbd_teardown_connection(BlockDriverState *bs) |
4a41a2d6 | 51 | { |
10676b81 | 52 | NBDClientSession *client = nbd_get_client_session(bs); |
f53a829b | 53 | |
064097d9 DB |
54 | if (!client->ioc) { /* Already closed */ |
55 | return; | |
56 | } | |
57 | ||
4a41a2d6 | 58 | /* finish any pending coroutines */ |
064097d9 DB |
59 | qio_channel_shutdown(client->ioc, |
60 | QIO_CHANNEL_SHUTDOWN_BOTH, | |
61 | NULL); | |
a12a712a | 62 | BDRV_POLL_WHILE(bs, client->read_reply_co); |
4a41a2d6 | 63 | |
f53a829b | 64 | nbd_client_detach_aio_context(bs); |
064097d9 DB |
65 | object_unref(OBJECT(client->sioc)); |
66 | client->sioc = NULL; | |
67 | object_unref(OBJECT(client->ioc)); | |
68 | client->ioc = NULL; | |
4a41a2d6 SH |
69 | } |
70 | ||
ff82911c | 71 | static coroutine_fn void nbd_read_reply_entry(void *opaque) |
2302c1ca | 72 | { |
ff82911c | 73 | NBDClientSession *s = opaque; |
2302c1ca | 74 | uint64_t i; |
d0a18013 | 75 | int ret = 0; |
be41c100 | 76 | Error *local_err = NULL; |
2302c1ca | 77 | |
72b6ffc7 | 78 | while (!s->quit) { |
ff82911c | 79 | assert(s->reply.handle == 0); |
be41c100 VSO |
80 | ret = nbd_receive_reply(s->ioc, &s->reply, &local_err); |
81 | if (ret < 0) { | |
82 | error_report_err(local_err); | |
83 | } | |
a12a712a | 84 | if (ret <= 0) { |
ff82911c | 85 | break; |
2302c1ca | 86 | } |
2302c1ca | 87 | |
ff82911c PB |
88 | /* There's no need for a mutex on the receive side, because the |
89 | * handler acts as a synchronization point and ensures that only | |
90 | * one coroutine is called until the reply finishes. | |
91 | */ | |
92 | i = HANDLE_TO_INDEX(s, s->reply.handle); | |
40f4a218 SH |
93 | if (i >= MAX_NBD_REQUESTS || |
94 | !s->requests[i].coroutine || | |
95 | !s->requests[i].receiving) { | |
ff82911c PB |
96 | break; |
97 | } | |
2302c1ca | 98 | |
40f4a218 | 99 | /* We're woken up again by the request itself. Note that there |
ff82911c PB |
100 | * is no race between yielding and reentering read_reply_co. This |
101 | * is because: | |
102 | * | |
40f4a218 | 103 | * - if the request runs on the same AioContext, it is only |
ff82911c PB |
104 | * entered after we yield |
105 | * | |
40f4a218 | 106 | * - if the request runs on a different AioContext, reentering |
ff82911c PB |
107 | * read_reply_co happens through a bottom half, which can only |
108 | * run after we yield. | |
109 | */ | |
40f4a218 | 110 | aio_co_wake(s->requests[i].coroutine); |
ff82911c | 111 | qemu_coroutine_yield(); |
2302c1ca | 112 | } |
a12a712a | 113 | |
40f4a218 | 114 | s->quit = true; |
07b1b99c | 115 | nbd_recv_coroutines_wake_all(s); |
ff82911c | 116 | s->read_reply_co = NULL; |
2302c1ca MAL |
117 | } |
118 | ||
f53a829b | 119 | static int nbd_co_send_request(BlockDriverState *bs, |
ed2dd912 | 120 | NBDRequest *request, |
1e2a77a8 | 121 | QEMUIOVector *qiov) |
2302c1ca | 122 | { |
10676b81 | 123 | NBDClientSession *s = nbd_get_client_session(bs); |
030fa7f6 | 124 | int rc, i; |
2302c1ca MAL |
125 | |
126 | qemu_co_mutex_lock(&s->send_mutex); | |
6bdcc018 PB |
127 | while (s->in_flight == MAX_NBD_REQUESTS) { |
128 | qemu_co_queue_wait(&s->free_sema, &s->send_mutex); | |
129 | } | |
130 | s->in_flight++; | |
141cabe6 BW |
131 | |
132 | for (i = 0; i < MAX_NBD_REQUESTS; i++) { | |
40f4a218 | 133 | if (s->requests[i].coroutine == NULL) { |
141cabe6 BW |
134 | break; |
135 | } | |
136 | } | |
137 | ||
1c778ef7 | 138 | g_assert(qemu_in_coroutine()); |
141cabe6 | 139 | assert(i < MAX_NBD_REQUESTS); |
40f4a218 SH |
140 | |
141 | s->requests[i].coroutine = qemu_coroutine_self(); | |
142 | s->requests[i].receiving = false; | |
143 | ||
141cabe6 | 144 | request->handle = INDEX_TO_HANDLE(s, i); |
064097d9 | 145 | |
72b6ffc7 | 146 | if (s->quit) { |
3c2d5183 SH |
147 | rc = -EIO; |
148 | goto err; | |
72b6ffc7 | 149 | } |
064097d9 | 150 | if (!s->ioc) { |
3c2d5183 SH |
151 | rc = -EPIPE; |
152 | goto err; | |
064097d9 DB |
153 | } |
154 | ||
2302c1ca | 155 | if (qiov) { |
064097d9 | 156 | qio_channel_set_cork(s->ioc, true); |
1c778ef7 | 157 | rc = nbd_send_request(s->ioc, request); |
72b6ffc7 | 158 | if (rc >= 0 && !s->quit) { |
030fa7f6 EB |
159 | assert(request->len == iov_size(qiov->iov, qiov->niov)); |
160 | if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov, | |
161 | NULL) < 0) { | |
2302c1ca MAL |
162 | rc = -EIO; |
163 | } | |
164 | } | |
064097d9 | 165 | qio_channel_set_cork(s->ioc, false); |
2302c1ca | 166 | } else { |
1c778ef7 | 167 | rc = nbd_send_request(s->ioc, request); |
2302c1ca | 168 | } |
3c2d5183 SH |
169 | |
170 | err: | |
72b6ffc7 EB |
171 | if (rc < 0) { |
172 | s->quit = true; | |
3c2d5183 SH |
173 | s->requests[i].coroutine = NULL; |
174 | s->in_flight--; | |
175 | qemu_co_queue_next(&s->free_sema); | |
72b6ffc7 | 176 | } |
2302c1ca MAL |
177 | qemu_co_mutex_unlock(&s->send_mutex); |
178 | return rc; | |
179 | } | |
180 | ||
10676b81 | 181 | static void nbd_co_receive_reply(NBDClientSession *s, |
ed2dd912 EB |
182 | NBDRequest *request, |
183 | NBDReply *reply, | |
1e2a77a8 | 184 | QEMUIOVector *qiov) |
2302c1ca | 185 | { |
40f4a218 | 186 | int i = HANDLE_TO_INDEX(s, request->handle); |
2302c1ca | 187 | |
ff82911c | 188 | /* Wait until we're woken up by nbd_read_reply_entry. */ |
40f4a218 | 189 | s->requests[i].receiving = true; |
2302c1ca | 190 | qemu_coroutine_yield(); |
40f4a218 | 191 | s->requests[i].receiving = false; |
2302c1ca | 192 | *reply = s->reply; |
72b6ffc7 | 193 | if (reply->handle != request->handle || !s->ioc || s->quit) { |
2302c1ca MAL |
194 | reply->error = EIO; |
195 | } else { | |
196 | if (qiov && reply->error == 0) { | |
030fa7f6 EB |
197 | assert(request->len == iov_size(qiov->iov, qiov->niov)); |
198 | if (qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov, | |
199 | NULL) < 0) { | |
2302c1ca | 200 | reply->error = EIO; |
40f4a218 | 201 | s->quit = true; |
2302c1ca MAL |
202 | } |
203 | } | |
204 | ||
205 | /* Tell the read handler to read another header. */ | |
206 | s->reply.handle = 0; | |
207 | } | |
ff82911c | 208 | |
40f4a218 | 209 | s->requests[i].coroutine = NULL; |
ff82911c PB |
210 | |
211 | /* Kick the read_reply_co to get the next reply. */ | |
212 | if (s->read_reply_co) { | |
213 | aio_co_wake(s->read_reply_co); | |
2302c1ca | 214 | } |
6bdcc018 PB |
215 | |
216 | qemu_co_mutex_lock(&s->send_mutex); | |
217 | s->in_flight--; | |
218 | qemu_co_queue_next(&s->free_sema); | |
219 | qemu_co_mutex_unlock(&s->send_mutex); | |
2302c1ca MAL |
220 | } |
221 | ||
f35dff7e VSO |
222 | static int nbd_co_request(BlockDriverState *bs, |
223 | NBDRequest *request, | |
224 | QEMUIOVector *qiov) | |
225 | { | |
226 | NBDClientSession *client = nbd_get_client_session(bs); | |
227 | NBDReply reply; | |
228 | int ret; | |
229 | ||
230 | assert(!qiov || request->type == NBD_CMD_WRITE || | |
231 | request->type == NBD_CMD_READ); | |
232 | ret = nbd_co_send_request(bs, request, | |
233 | request->type == NBD_CMD_WRITE ? qiov : NULL); | |
234 | if (ret < 0) { | |
235 | reply.error = -ret; | |
236 | } else { | |
237 | nbd_co_receive_reply(client, request, &reply, | |
238 | request->type == NBD_CMD_READ ? qiov : NULL); | |
239 | } | |
240 | return -reply.error; | |
241 | } | |
242 | ||
70c4fb26 EB |
243 | int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, |
244 | uint64_t bytes, QEMUIOVector *qiov, int flags) | |
2302c1ca | 245 | { |
ed2dd912 | 246 | NBDRequest request = { |
70c4fb26 EB |
247 | .type = NBD_CMD_READ, |
248 | .from = offset, | |
249 | .len = bytes, | |
250 | }; | |
2302c1ca | 251 | |
70c4fb26 EB |
252 | assert(bytes <= NBD_MAX_BUFFER_SIZE); |
253 | assert(!flags); | |
2302c1ca | 254 | |
f35dff7e | 255 | return nbd_co_request(bs, &request, qiov); |
2302c1ca MAL |
256 | } |
257 | ||
70c4fb26 EB |
258 | int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, |
259 | uint64_t bytes, QEMUIOVector *qiov, int flags) | |
2302c1ca | 260 | { |
10676b81 | 261 | NBDClientSession *client = nbd_get_client_session(bs); |
ed2dd912 | 262 | NBDRequest request = { |
70c4fb26 EB |
263 | .type = NBD_CMD_WRITE, |
264 | .from = offset, | |
265 | .len = bytes, | |
266 | }; | |
2302c1ca | 267 | |
52a46505 | 268 | if (flags & BDRV_REQ_FUA) { |
004a89fc | 269 | assert(client->info.flags & NBD_FLAG_SEND_FUA); |
b626b51a | 270 | request.flags |= NBD_CMD_FLAG_FUA; |
2302c1ca MAL |
271 | } |
272 | ||
70c4fb26 | 273 | assert(bytes <= NBD_MAX_BUFFER_SIZE); |
2302c1ca | 274 | |
f35dff7e | 275 | return nbd_co_request(bs, &request, qiov); |
2302c1ca MAL |
276 | } |
277 | ||
fa778fff | 278 | int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, |
f5a5ca79 | 279 | int bytes, BdrvRequestFlags flags) |
fa778fff | 280 | { |
fa778fff EB |
281 | NBDClientSession *client = nbd_get_client_session(bs); |
282 | NBDRequest request = { | |
283 | .type = NBD_CMD_WRITE_ZEROES, | |
284 | .from = offset, | |
f5a5ca79 | 285 | .len = bytes, |
fa778fff | 286 | }; |
fa778fff | 287 | |
004a89fc | 288 | if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) { |
fa778fff EB |
289 | return -ENOTSUP; |
290 | } | |
291 | ||
292 | if (flags & BDRV_REQ_FUA) { | |
004a89fc | 293 | assert(client->info.flags & NBD_FLAG_SEND_FUA); |
fa778fff EB |
294 | request.flags |= NBD_CMD_FLAG_FUA; |
295 | } | |
296 | if (!(flags & BDRV_REQ_MAY_UNMAP)) { | |
297 | request.flags |= NBD_CMD_FLAG_NO_HOLE; | |
298 | } | |
299 | ||
f35dff7e | 300 | return nbd_co_request(bs, &request, NULL); |
fa778fff EB |
301 | } |
302 | ||
f53a829b | 303 | int nbd_client_co_flush(BlockDriverState *bs) |
2302c1ca | 304 | { |
10676b81 | 305 | NBDClientSession *client = nbd_get_client_session(bs); |
ed2dd912 | 306 | NBDRequest request = { .type = NBD_CMD_FLUSH }; |
2302c1ca | 307 | |
004a89fc | 308 | if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) { |
2302c1ca MAL |
309 | return 0; |
310 | } | |
311 | ||
2302c1ca MAL |
312 | request.from = 0; |
313 | request.len = 0; | |
314 | ||
f35dff7e | 315 | return nbd_co_request(bs, &request, NULL); |
2302c1ca MAL |
316 | } |
317 | ||
f5a5ca79 | 318 | int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) |
2302c1ca | 319 | { |
10676b81 | 320 | NBDClientSession *client = nbd_get_client_session(bs); |
ed2dd912 | 321 | NBDRequest request = { |
447e57c3 EB |
322 | .type = NBD_CMD_TRIM, |
323 | .from = offset, | |
f5a5ca79 | 324 | .len = bytes, |
447e57c3 | 325 | }; |
2302c1ca | 326 | |
004a89fc | 327 | if (!(client->info.flags & NBD_FLAG_SEND_TRIM)) { |
2302c1ca MAL |
328 | return 0; |
329 | } | |
2302c1ca | 330 | |
f35dff7e | 331 | return nbd_co_request(bs, &request, NULL); |
2302c1ca MAL |
332 | } |
333 | ||
f53a829b | 334 | void nbd_client_detach_aio_context(BlockDriverState *bs) |
69447cd8 | 335 | { |
ff82911c | 336 | NBDClientSession *client = nbd_get_client_session(bs); |
96d06835 | 337 | qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc)); |
69447cd8 SH |
338 | } |
339 | ||
f53a829b HR |
340 | void nbd_client_attach_aio_context(BlockDriverState *bs, |
341 | AioContext *new_context) | |
69447cd8 | 342 | { |
ff82911c | 343 | NBDClientSession *client = nbd_get_client_session(bs); |
96d06835 | 344 | qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context); |
ff82911c | 345 | aio_co_schedule(new_context, client->read_reply_co); |
69447cd8 SH |
346 | } |
347 | ||
f53a829b | 348 | void nbd_client_close(BlockDriverState *bs) |
2302c1ca | 349 | { |
10676b81 | 350 | NBDClientSession *client = nbd_get_client_session(bs); |
ed2dd912 | 351 | NBDRequest request = { .type = NBD_CMD_DISC }; |
2302c1ca | 352 | |
064097d9 | 353 | if (client->ioc == NULL) { |
4a41a2d6 SH |
354 | return; |
355 | } | |
356 | ||
1c778ef7 | 357 | nbd_send_request(client->ioc, &request); |
5ad283eb | 358 | |
f53a829b | 359 | nbd_teardown_connection(bs); |
2302c1ca MAL |
360 | } |
361 | ||
75822a12 DB |
362 | int nbd_client_init(BlockDriverState *bs, |
363 | QIOChannelSocket *sioc, | |
364 | const char *export, | |
365 | QCryptoTLSCreds *tlscreds, | |
366 | const char *hostname, | |
367 | Error **errp) | |
2302c1ca | 368 | { |
10676b81 | 369 | NBDClientSession *client = nbd_get_client_session(bs); |
2302c1ca MAL |
370 | int ret; |
371 | ||
372 | /* NBD handshake */ | |
e2bc625f | 373 | logout("session init %s\n", export); |
064097d9 DB |
374 | qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL); |
375 | ||
081dd1fe | 376 | client->info.request_sizes = true; |
1c778ef7 | 377 | ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export, |
75822a12 | 378 | tlscreds, hostname, |
004a89fc | 379 | &client->ioc, &client->info, errp); |
2302c1ca MAL |
380 | if (ret < 0) { |
381 | logout("Failed to negotiate with the NBD server\n"); | |
2302c1ca MAL |
382 | return ret; |
383 | } | |
004a89fc | 384 | if (client->info.flags & NBD_FLAG_SEND_FUA) { |
4df863f3 | 385 | bs->supported_write_flags = BDRV_REQ_FUA; |
169407e1 EB |
386 | bs->supported_zero_flags |= BDRV_REQ_FUA; |
387 | } | |
004a89fc | 388 | if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) { |
169407e1 | 389 | bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP; |
4df863f3 | 390 | } |
081dd1fe EB |
391 | if (client->info.min_block > bs->bl.request_alignment) { |
392 | bs->bl.request_alignment = client->info.min_block; | |
393 | } | |
2302c1ca MAL |
394 | |
395 | qemu_co_mutex_init(&client->send_mutex); | |
9bc9732f | 396 | qemu_co_queue_init(&client->free_sema); |
064097d9 DB |
397 | client->sioc = sioc; |
398 | object_ref(OBJECT(client->sioc)); | |
f95910fe DB |
399 | |
400 | if (!client->ioc) { | |
401 | client->ioc = QIO_CHANNEL(sioc); | |
402 | object_ref(OBJECT(client->ioc)); | |
403 | } | |
2302c1ca MAL |
404 | |
405 | /* Now that we're connected, set the socket to be non-blocking and | |
406 | * kick the reply mechanism. */ | |
064097d9 | 407 | qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL); |
ff82911c | 408 | client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client); |
f53a829b | 409 | nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs)); |
2302c1ca MAL |
410 | |
411 | logout("Established connection with NBD server\n"); | |
412 | return 0; | |
413 | } |