]>
Commit | Line | Data |
---|---|---|
75818250 | 1 | /* |
a16a7907 | 2 | * Copyright (C) 2016-2018 Red Hat, Inc. |
7a5ca864 FB |
3 | * Copyright (C) 2005 Anthony Liguori <[email protected]> |
4 | * | |
798bfe00 | 5 | * Network Block Device Server Side |
7a5ca864 FB |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; under version 2 of the License. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
8167ee88 | 17 | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
75818250 | 18 | */ |
7a5ca864 | 19 | |
d38ea87a | 20 | #include "qemu/osdep.h" |
da34e65c | 21 | #include "qapi/error.h" |
9588463e | 22 | #include "trace.h" |
798bfe00 | 23 | #include "nbd-internal.h" |
ca441480 PB |
24 | |
25 | static int system_errno_to_nbd_errno(int err) | |
26 | { | |
27 | switch (err) { | |
28 | case 0: | |
29 | return NBD_SUCCESS; | |
30 | case EPERM: | |
c0301fcc | 31 | case EROFS: |
ca441480 PB |
32 | return NBD_EPERM; |
33 | case EIO: | |
34 | return NBD_EIO; | |
35 | case ENOMEM: | |
36 | return NBD_ENOMEM; | |
37 | #ifdef EDQUOT | |
38 | case EDQUOT: | |
39 | #endif | |
40 | case EFBIG: | |
41 | case ENOSPC: | |
42 | return NBD_ENOSPC; | |
bae245d1 EB |
43 | case EOVERFLOW: |
44 | return NBD_EOVERFLOW; | |
b6f5d3b5 EB |
45 | case ESHUTDOWN: |
46 | return NBD_ESHUTDOWN; | |
ca441480 PB |
47 | case EINVAL: |
48 | default: | |
49 | return NBD_EINVAL; | |
50 | } | |
51 | } | |
52 | ||
9a304d29 PB |
53 | /* Definitions for opaque data types */ |
54 | ||
315f78ab | 55 | typedef struct NBDRequestData NBDRequestData; |
9a304d29 | 56 | |
315f78ab EB |
57 | struct NBDRequestData { |
58 | QSIMPLEQ_ENTRY(NBDRequestData) entry; | |
9a304d29 PB |
59 | NBDClient *client; |
60 | uint8_t *data; | |
29b6c3b3 | 61 | bool complete; |
9a304d29 PB |
62 | }; |
63 | ||
64 | struct NBDExport { | |
2c8d9f06 | 65 | int refcount; |
0ddf08db PB |
66 | void (*close)(NBDExport *exp); |
67 | ||
aadf99a7 | 68 | BlockBackend *blk; |
ee0a19ec | 69 | char *name; |
b1a75b33 | 70 | char *description; |
9a304d29 PB |
71 | off_t dev_offset; |
72 | off_t size; | |
7423f417 | 73 | uint16_t nbdflags; |
4b9441f6 | 74 | QTAILQ_HEAD(, NBDClient) clients; |
ee0a19ec | 75 | QTAILQ_ENTRY(NBDExport) next; |
958c717d HR |
76 | |
77 | AioContext *ctx; | |
741cc431 | 78 | |
cd7fca95 | 79 | BlockBackend *eject_notifier_blk; |
741cc431 | 80 | Notifier eject_notifier; |
9a304d29 PB |
81 | }; |
82 | ||
ee0a19ec PB |
83 | static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); |
84 | ||
9a304d29 PB |
85 | struct NBDClient { |
86 | int refcount; | |
0c9390d9 | 87 | void (*close_fn)(NBDClient *client, bool negotiated); |
9a304d29 PB |
88 | |
89 | NBDExport *exp; | |
f95910fe DB |
90 | QCryptoTLSCreds *tlscreds; |
91 | char *tlsaclname; | |
1c778ef7 DB |
92 | QIOChannelSocket *sioc; /* The underlying data channel */ |
93 | QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ | |
9a304d29 PB |
94 | |
95 | Coroutine *recv_coroutine; | |
96 | ||
97 | CoMutex send_lock; | |
98 | Coroutine *send_coroutine; | |
99 | ||
4b9441f6 | 100 | QTAILQ_ENTRY(NBDClient) next; |
9a304d29 | 101 | int nb_requests; |
ff2b68aa | 102 | bool closing; |
5c54e7fa VSO |
103 | |
104 | bool structured_reply; | |
9a304d29 | 105 | |
0cfae925 VSO |
106 | uint32_t opt; /* Current option being negotiated */ |
107 | uint32_t optlen; /* remaining length of data in ioc for the option being | |
108 | negotiated now */ | |
109 | }; | |
7a5ca864 | 110 | |
ff82911c | 111 | static void nbd_client_receive_next_request(NBDClient *client); |
958c717d | 112 | |
6b8c01e7 | 113 | /* Basic flow for negotiation |
7a5ca864 FB |
114 | |
115 | Server Client | |
7a5ca864 | 116 | Negotiate |
6b8c01e7 PB |
117 | |
118 | or | |
119 | ||
120 | Server Client | |
121 | Negotiate #1 | |
122 | Option | |
123 | Negotiate #2 | |
124 | ||
125 | ---- | |
126 | ||
127 | followed by | |
128 | ||
129 | Server Client | |
7a5ca864 FB |
130 | Request |
131 | Response | |
132 | Request | |
133 | Response | |
134 | ... | |
135 | ... | |
136 | Request (type == 2) | |
6b8c01e7 | 137 | |
7a5ca864 FB |
138 | */ |
139 | ||
1d17922a VSO |
140 | static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option, |
141 | uint32_t type, uint32_t length) | |
142 | { | |
143 | stq_be_p(&rep->magic, NBD_REP_MAGIC); | |
144 | stl_be_p(&rep->option, option); | |
145 | stl_be_p(&rep->type, type); | |
146 | stl_be_p(&rep->length, length); | |
147 | } | |
148 | ||
526e5c65 EB |
149 | /* Send a reply header, including length, but no payload. |
150 | * Return -errno on error, 0 on success. */ | |
0cfae925 VSO |
151 | static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, |
152 | uint32_t len, Error **errp) | |
6b8c01e7 | 153 | { |
1d17922a | 154 | NBDOptionReply rep; |
6b8c01e7 | 155 | |
1d17922a | 156 | trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt), |
3736cc5b | 157 | type, nbd_rep_lookup(type), len); |
f95910fe | 158 | |
f37708f6 | 159 | assert(len < NBD_MAX_BUFFER_SIZE); |
2fd2c840 | 160 | |
1d17922a VSO |
161 | set_be_option_rep(&rep, client->opt, type, len); |
162 | return nbd_write(client->ioc, &rep, sizeof(rep), errp); | |
f5076b5a | 163 | } |
6b8c01e7 | 164 | |
526e5c65 EB |
165 | /* Send a reply header with default 0 length. |
166 | * Return -errno on error, 0 on success. */ | |
0cfae925 | 167 | static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type, |
2fd2c840 | 168 | Error **errp) |
526e5c65 | 169 | { |
0cfae925 | 170 | return nbd_negotiate_send_rep_len(client, type, 0, errp); |
526e5c65 EB |
171 | } |
172 | ||
36683283 EB |
173 | /* Send an error reply. |
174 | * Return -errno on error, 0 on success. */ | |
41f5dfaf EB |
175 | static int GCC_FMT_ATTR(4, 0) |
176 | nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, | |
177 | Error **errp, const char *fmt, va_list va) | |
36683283 | 178 | { |
36683283 EB |
179 | char *msg; |
180 | int ret; | |
181 | size_t len; | |
182 | ||
36683283 | 183 | msg = g_strdup_vprintf(fmt, va); |
36683283 EB |
184 | len = strlen(msg); |
185 | assert(len < 4096); | |
9588463e | 186 | trace_nbd_negotiate_send_rep_err(msg); |
0cfae925 | 187 | ret = nbd_negotiate_send_rep_len(client, type, len, errp); |
36683283 EB |
188 | if (ret < 0) { |
189 | goto out; | |
190 | } | |
0cfae925 | 191 | if (nbd_write(client->ioc, msg, len, errp) < 0) { |
2fd2c840 | 192 | error_prepend(errp, "write failed (error message): "); |
36683283 EB |
193 | ret = -EIO; |
194 | } else { | |
195 | ret = 0; | |
196 | } | |
2fd2c840 | 197 | |
36683283 EB |
198 | out: |
199 | g_free(msg); | |
200 | return ret; | |
201 | } | |
202 | ||
41f5dfaf EB |
203 | /* Send an error reply. |
204 | * Return -errno on error, 0 on success. */ | |
205 | static int GCC_FMT_ATTR(4, 5) | |
206 | nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, | |
207 | Error **errp, const char *fmt, ...) | |
208 | { | |
209 | va_list va; | |
210 | int ret; | |
211 | ||
212 | va_start(va, fmt); | |
213 | ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va); | |
214 | va_end(va); | |
215 | return ret; | |
216 | } | |
217 | ||
894e0280 EB |
218 | /* Drop remainder of the current option, and send a reply with the |
219 | * given error type and message. Return -errno on read or write | |
220 | * failure; or 0 if connection is still live. */ | |
221 | static int GCC_FMT_ATTR(4, 5) | |
222 | nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, | |
223 | const char *fmt, ...) | |
224 | { | |
225 | int ret = nbd_drop(client->ioc, client->optlen, errp); | |
226 | va_list va; | |
227 | ||
228 | client->optlen = 0; | |
229 | if (!ret) { | |
230 | va_start(va, fmt); | |
231 | ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va); | |
232 | va_end(va); | |
233 | } | |
234 | return ret; | |
235 | } | |
236 | ||
237 | /* Read size bytes from the unparsed payload of the current option. | |
238 | * Return -errno on I/O error, 0 if option was completely handled by | |
239 | * sending a reply about inconsistent lengths, or 1 on success. */ | |
240 | static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, | |
241 | Error **errp) | |
242 | { | |
243 | if (size > client->optlen) { | |
244 | return nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp, | |
245 | "Inconsistent lengths in option %s", | |
246 | nbd_opt_lookup(client->opt)); | |
247 | } | |
248 | client->optlen -= size; | |
249 | return qio_channel_read_all(client->ioc, buffer, size, errp) < 0 ? -EIO : 1; | |
250 | } | |
251 | ||
526e5c65 EB |
252 | /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. |
253 | * Return -errno on error, 0 on success. */ | |
0cfae925 | 254 | static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, |
2fd2c840 | 255 | Error **errp) |
32d7d2e0 | 256 | { |
b1a75b33 | 257 | size_t name_len, desc_len; |
526e5c65 | 258 | uint32_t len; |
b1a75b33 EB |
259 | const char *name = exp->name ? exp->name : ""; |
260 | const char *desc = exp->description ? exp->description : ""; | |
0cfae925 | 261 | QIOChannel *ioc = client->ioc; |
2e5c9ad6 | 262 | int ret; |
32d7d2e0 | 263 | |
9588463e | 264 | trace_nbd_negotiate_send_rep_list(name, desc); |
b1a75b33 EB |
265 | name_len = strlen(name); |
266 | desc_len = strlen(desc); | |
526e5c65 | 267 | len = name_len + desc_len + sizeof(len); |
0cfae925 | 268 | ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp); |
2e5c9ad6 VSO |
269 | if (ret < 0) { |
270 | return ret; | |
32d7d2e0 | 271 | } |
526e5c65 | 272 | |
32d7d2e0 | 273 | len = cpu_to_be32(name_len); |
2fd2c840 VSO |
274 | if (nbd_write(ioc, &len, sizeof(len), errp) < 0) { |
275 | error_prepend(errp, "write failed (name length): "); | |
b1a75b33 EB |
276 | return -EINVAL; |
277 | } | |
2fd2c840 VSO |
278 | |
279 | if (nbd_write(ioc, name, name_len, errp) < 0) { | |
280 | error_prepend(errp, "write failed (name buffer): "); | |
32d7d2e0 HB |
281 | return -EINVAL; |
282 | } | |
2fd2c840 VSO |
283 | |
284 | if (nbd_write(ioc, desc, desc_len, errp) < 0) { | |
285 | error_prepend(errp, "write failed (description buffer): "); | |
32d7d2e0 HB |
286 | return -EINVAL; |
287 | } | |
2fd2c840 | 288 | |
32d7d2e0 HB |
289 | return 0; |
290 | } | |
291 | ||
526e5c65 EB |
292 | /* Process the NBD_OPT_LIST command, with a potential series of replies. |
293 | * Return -errno on error, 0 on success. */ | |
e68c35cf | 294 | static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) |
32d7d2e0 | 295 | { |
32d7d2e0 | 296 | NBDExport *exp; |
0cfae925 | 297 | assert(client->opt == NBD_OPT_LIST); |
32d7d2e0 | 298 | |
32d7d2e0 HB |
299 | /* For each export, send a NBD_REP_SERVER reply. */ |
300 | QTAILQ_FOREACH(exp, &exports, next) { | |
0cfae925 | 301 | if (nbd_negotiate_send_rep_list(client, exp, errp)) { |
32d7d2e0 HB |
302 | return -EINVAL; |
303 | } | |
304 | } | |
305 | /* Finish with a NBD_REP_ACK. */ | |
0cfae925 | 306 | return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); |
32d7d2e0 HB |
307 | } |
308 | ||
f37708f6 EB |
309 | /* Send a reply to NBD_OPT_EXPORT_NAME. |
310 | * Return -errno on error, 0 on success. */ | |
0cfae925 | 311 | static int nbd_negotiate_handle_export_name(NBDClient *client, |
23e099c3 | 312 | uint16_t myflags, bool no_zeroes, |
2fd2c840 | 313 | Error **errp) |
f5076b5a | 314 | { |
943cec86 | 315 | char name[NBD_MAX_NAME_SIZE + 1]; |
5f66d060 | 316 | char buf[NBD_REPLY_EXPORT_NAME_SIZE] = ""; |
23e099c3 EB |
317 | size_t len; |
318 | int ret; | |
6b8c01e7 | 319 | |
f5076b5a HB |
320 | /* Client sends: |
321 | [20 .. xx] export name (length bytes) | |
5f66d060 EB |
322 | Server replies: |
323 | [ 0 .. 7] size | |
324 | [ 8 .. 9] export flags | |
325 | [10 .. 133] reserved (0) [unless no_zeroes] | |
f5076b5a | 326 | */ |
9588463e | 327 | trace_nbd_negotiate_handle_export_name(); |
0cfae925 | 328 | if (client->optlen >= sizeof(name)) { |
2fd2c840 | 329 | error_setg(errp, "Bad length received"); |
d9faeed8 | 330 | return -EINVAL; |
6b8c01e7 | 331 | } |
0cfae925 | 332 | if (nbd_read(client->ioc, name, client->optlen, errp) < 0) { |
2fd2c840 | 333 | error_prepend(errp, "read failed: "); |
32f158a6 | 334 | return -EIO; |
6b8c01e7 | 335 | } |
0cfae925 VSO |
336 | name[client->optlen] = '\0'; |
337 | client->optlen = 0; | |
6b8c01e7 | 338 | |
9588463e | 339 | trace_nbd_negotiate_handle_export_name_request(name); |
9344e5f5 | 340 | |
6b8c01e7 PB |
341 | client->exp = nbd_export_find(name); |
342 | if (!client->exp) { | |
2fd2c840 | 343 | error_setg(errp, "export not found"); |
d9faeed8 | 344 | return -EINVAL; |
6b8c01e7 PB |
345 | } |
346 | ||
23e099c3 EB |
347 | trace_nbd_negotiate_new_style_size_flags(client->exp->size, |
348 | client->exp->nbdflags | myflags); | |
349 | stq_be_p(buf, client->exp->size); | |
350 | stw_be_p(buf + 8, client->exp->nbdflags | myflags); | |
351 | len = no_zeroes ? 10 : sizeof(buf); | |
352 | ret = nbd_write(client->ioc, buf, len, errp); | |
353 | if (ret < 0) { | |
354 | error_prepend(errp, "write failed: "); | |
355 | return ret; | |
356 | } | |
357 | ||
6b8c01e7 PB |
358 | QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); |
359 | nbd_export_get(client->exp); | |
d9faeed8 VSO |
360 | |
361 | return 0; | |
6b8c01e7 PB |
362 | } |
363 | ||
f37708f6 EB |
364 | /* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes. |
365 | * The buffer does NOT include the info type prefix. | |
366 | * Return -errno on error, 0 if ready to send more. */ | |
0cfae925 | 367 | static int nbd_negotiate_send_info(NBDClient *client, |
f37708f6 EB |
368 | uint16_t info, uint32_t length, void *buf, |
369 | Error **errp) | |
370 | { | |
371 | int rc; | |
372 | ||
373 | trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length); | |
0cfae925 | 374 | rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO, |
f37708f6 EB |
375 | sizeof(info) + length, errp); |
376 | if (rc < 0) { | |
377 | return rc; | |
378 | } | |
379 | cpu_to_be16s(&info); | |
380 | if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) { | |
381 | return -EIO; | |
382 | } | |
383 | if (nbd_write(client->ioc, buf, length, errp) < 0) { | |
384 | return -EIO; | |
385 | } | |
386 | return 0; | |
387 | } | |
388 | ||
a16a7907 EB |
389 | /* nbd_reject_length: Handle any unexpected payload. |
390 | * @fatal requests that we quit talking to the client, even if we are able | |
391 | * to successfully send an error reply. | |
392 | * Return: | |
393 | * -errno transmission error occurred or @fatal was requested, errp is set | |
394 | * 0 error message successfully sent to client, errp is not set | |
395 | */ | |
0cfae925 | 396 | static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) |
a16a7907 EB |
397 | { |
398 | int ret; | |
399 | ||
0cfae925 | 400 | assert(client->optlen); |
894e0280 EB |
401 | ret = nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp, |
402 | "option '%s' has unexpected length", | |
403 | nbd_opt_lookup(client->opt)); | |
a16a7907 | 404 | if (fatal && !ret) { |
894e0280 | 405 | error_setg(errp, "option '%s' has unexpected length", |
0cfae925 | 406 | nbd_opt_lookup(client->opt)); |
a16a7907 EB |
407 | return -EINVAL; |
408 | } | |
409 | return ret; | |
410 | } | |
411 | ||
f37708f6 EB |
412 | /* Handle NBD_OPT_INFO and NBD_OPT_GO. |
413 | * Return -errno on error, 0 if ready for next option, and 1 to move | |
414 | * into transmission phase. */ | |
0cfae925 | 415 | static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags, |
f37708f6 EB |
416 | Error **errp) |
417 | { | |
418 | int rc; | |
419 | char name[NBD_MAX_NAME_SIZE + 1]; | |
420 | NBDExport *exp; | |
421 | uint16_t requests; | |
422 | uint16_t request; | |
423 | uint32_t namelen; | |
424 | bool sendname = false; | |
0c1d50bd EB |
425 | bool blocksize = false; |
426 | uint32_t sizes[3]; | |
f37708f6 | 427 | char buf[sizeof(uint64_t) + sizeof(uint16_t)]; |
f37708f6 EB |
428 | |
429 | /* Client sends: | |
430 | 4 bytes: L, name length (can be 0) | |
431 | L bytes: export name | |
432 | 2 bytes: N, number of requests (can be 0) | |
433 | N * 2 bytes: N requests | |
434 | */ | |
894e0280 EB |
435 | rc = nbd_opt_read(client, &namelen, sizeof(namelen), errp); |
436 | if (rc <= 0) { | |
437 | return rc; | |
f37708f6 EB |
438 | } |
439 | be32_to_cpus(&namelen); | |
51ae4f84 | 440 | if (namelen >= sizeof(name)) { |
894e0280 EB |
441 | return nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp, |
442 | "name too long for qemu"); | |
51ae4f84 | 443 | } |
894e0280 EB |
444 | rc = nbd_opt_read(client, name, namelen, errp); |
445 | if (rc <= 0) { | |
446 | return rc; | |
f37708f6 EB |
447 | } |
448 | name[namelen] = '\0'; | |
f37708f6 EB |
449 | trace_nbd_negotiate_handle_export_name_request(name); |
450 | ||
894e0280 EB |
451 | rc = nbd_opt_read(client, &requests, sizeof(requests), errp); |
452 | if (rc <= 0) { | |
453 | return rc; | |
f37708f6 EB |
454 | } |
455 | be16_to_cpus(&requests); | |
f37708f6 | 456 | trace_nbd_negotiate_handle_info_requests(requests); |
f37708f6 | 457 | while (requests--) { |
894e0280 EB |
458 | rc = nbd_opt_read(client, &request, sizeof(request), errp); |
459 | if (rc <= 0) { | |
460 | return rc; | |
f37708f6 EB |
461 | } |
462 | be16_to_cpus(&request); | |
f37708f6 EB |
463 | trace_nbd_negotiate_handle_info_request(request, |
464 | nbd_info_lookup(request)); | |
0c1d50bd EB |
465 | /* We care about NBD_INFO_NAME and NBD_INFO_BLOCK_SIZE; |
466 | * everything else is either a request we don't know or | |
467 | * something we send regardless of request */ | |
468 | switch (request) { | |
469 | case NBD_INFO_NAME: | |
f37708f6 | 470 | sendname = true; |
0c1d50bd EB |
471 | break; |
472 | case NBD_INFO_BLOCK_SIZE: | |
473 | blocksize = true; | |
474 | break; | |
f37708f6 EB |
475 | } |
476 | } | |
894e0280 EB |
477 | if (client->optlen) { |
478 | return nbd_reject_length(client, false, errp); | |
479 | } | |
f37708f6 EB |
480 | |
481 | exp = nbd_export_find(name); | |
482 | if (!exp) { | |
0cfae925 VSO |
483 | return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN, |
484 | errp, "export '%s' not present", | |
f37708f6 EB |
485 | name); |
486 | } | |
487 | ||
488 | /* Don't bother sending NBD_INFO_NAME unless client requested it */ | |
489 | if (sendname) { | |
0cfae925 | 490 | rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name, |
f37708f6 EB |
491 | errp); |
492 | if (rc < 0) { | |
493 | return rc; | |
494 | } | |
495 | } | |
496 | ||
497 | /* Send NBD_INFO_DESCRIPTION only if available, regardless of | |
498 | * client request */ | |
499 | if (exp->description) { | |
500 | size_t len = strlen(exp->description); | |
501 | ||
0cfae925 | 502 | rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION, |
f37708f6 EB |
503 | len, exp->description, errp); |
504 | if (rc < 0) { | |
505 | return rc; | |
506 | } | |
507 | } | |
508 | ||
0c1d50bd EB |
509 | /* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size |
510 | * according to whether the client requested it, and according to | |
511 | * whether this is OPT_INFO or OPT_GO. */ | |
512 | /* minimum - 1 for back-compat, or 512 if client is new enough. | |
513 | * TODO: consult blk_bs(blk)->bl.request_alignment? */ | |
0cfae925 VSO |
514 | sizes[0] = |
515 | (client->opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1; | |
0c1d50bd EB |
516 | /* preferred - Hard-code to 4096 for now. |
517 | * TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */ | |
518 | sizes[1] = 4096; | |
519 | /* maximum - At most 32M, but smaller as appropriate. */ | |
520 | sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE); | |
521 | trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]); | |
522 | cpu_to_be32s(&sizes[0]); | |
523 | cpu_to_be32s(&sizes[1]); | |
524 | cpu_to_be32s(&sizes[2]); | |
0cfae925 | 525 | rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE, |
0c1d50bd EB |
526 | sizeof(sizes), sizes, errp); |
527 | if (rc < 0) { | |
528 | return rc; | |
529 | } | |
530 | ||
f37708f6 EB |
531 | /* Send NBD_INFO_EXPORT always */ |
532 | trace_nbd_negotiate_new_style_size_flags(exp->size, | |
533 | exp->nbdflags | myflags); | |
534 | stq_be_p(buf, exp->size); | |
535 | stw_be_p(buf + 8, exp->nbdflags | myflags); | |
0cfae925 | 536 | rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT, |
f37708f6 EB |
537 | sizeof(buf), buf, errp); |
538 | if (rc < 0) { | |
539 | return rc; | |
540 | } | |
541 | ||
0c1d50bd EB |
542 | /* If the client is just asking for NBD_OPT_INFO, but forgot to |
543 | * request block sizes, return an error. | |
544 | * TODO: consult blk_bs(blk)->request_align, and only error if it | |
545 | * is not 1? */ | |
0cfae925 VSO |
546 | if (client->opt == NBD_OPT_INFO && !blocksize) { |
547 | return nbd_negotiate_send_rep_err(client, | |
548 | NBD_REP_ERR_BLOCK_SIZE_REQD, | |
0c1d50bd EB |
549 | errp, |
550 | "request NBD_INFO_BLOCK_SIZE to " | |
551 | "use this export"); | |
552 | } | |
553 | ||
f37708f6 | 554 | /* Final reply */ |
0cfae925 | 555 | rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); |
f37708f6 EB |
556 | if (rc < 0) { |
557 | return rc; | |
558 | } | |
559 | ||
0cfae925 | 560 | if (client->opt == NBD_OPT_GO) { |
f37708f6 EB |
561 | client->exp = exp; |
562 | QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); | |
563 | nbd_export_get(client->exp); | |
564 | rc = 1; | |
565 | } | |
566 | return rc; | |
f37708f6 EB |
567 | } |
568 | ||
569 | ||
36683283 EB |
570 | /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the |
571 | * new channel for all further (now-encrypted) communication. */ | |
f95910fe | 572 | static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, |
2fd2c840 | 573 | Error **errp) |
f95910fe DB |
574 | { |
575 | QIOChannel *ioc; | |
576 | QIOChannelTLS *tioc; | |
577 | struct NBDTLSHandshakeData data = { 0 }; | |
578 | ||
0cfae925 VSO |
579 | assert(client->opt == NBD_OPT_STARTTLS); |
580 | ||
9588463e | 581 | trace_nbd_negotiate_handle_starttls(); |
f95910fe | 582 | ioc = client->ioc; |
f95910fe | 583 | |
0cfae925 | 584 | if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) { |
63d5ef86 EB |
585 | return NULL; |
586 | } | |
f95910fe DB |
587 | |
588 | tioc = qio_channel_tls_new_server(ioc, | |
589 | client->tlscreds, | |
590 | client->tlsaclname, | |
2fd2c840 | 591 | errp); |
f95910fe DB |
592 | if (!tioc) { |
593 | return NULL; | |
594 | } | |
595 | ||
0d73f725 | 596 | qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls"); |
9588463e | 597 | trace_nbd_negotiate_handle_starttls_handshake(); |
f95910fe DB |
598 | data.loop = g_main_loop_new(g_main_context_default(), FALSE); |
599 | qio_channel_tls_handshake(tioc, | |
600 | nbd_tls_handshake, | |
601 | &data, | |
602 | NULL); | |
603 | ||
604 | if (!data.complete) { | |
605 | g_main_loop_run(data.loop); | |
606 | } | |
607 | g_main_loop_unref(data.loop); | |
608 | if (data.error) { | |
609 | object_unref(OBJECT(tioc)); | |
2fd2c840 | 610 | error_propagate(errp, data.error); |
f95910fe DB |
611 | return NULL; |
612 | } | |
613 | ||
614 | return QIO_CHANNEL(tioc); | |
615 | } | |
616 | ||
1e120ffe | 617 | /* nbd_negotiate_options |
f37708f6 EB |
618 | * Process all NBD_OPT_* client option commands, during fixed newstyle |
619 | * negotiation. | |
1e120ffe | 620 | * Return: |
2fd2c840 VSO |
621 | * -errno on error, errp is set |
622 | * 0 on successful negotiation, errp is not set | |
623 | * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, | |
624 | * errp is not set | |
1e120ffe | 625 | */ |
23e099c3 EB |
626 | static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, |
627 | Error **errp) | |
f5076b5a | 628 | { |
9c122ada | 629 | uint32_t flags; |
26afa868 | 630 | bool fixedNewstyle = false; |
23e099c3 | 631 | bool no_zeroes = false; |
9c122ada HR |
632 | |
633 | /* Client sends: | |
634 | [ 0 .. 3] client flags | |
635 | ||
f37708f6 | 636 | Then we loop until NBD_OPT_EXPORT_NAME or NBD_OPT_GO: |
9c122ada HR |
637 | [ 0 .. 7] NBD_OPTS_MAGIC |
638 | [ 8 .. 11] NBD option | |
639 | [12 .. 15] Data length | |
640 | ... Rest of request | |
641 | ||
642 | [ 0 .. 7] NBD_OPTS_MAGIC | |
643 | [ 8 .. 11] Second NBD option | |
644 | [12 .. 15] Data length | |
645 | ... Rest of request | |
646 | */ | |
647 | ||
2fd2c840 VSO |
648 | if (nbd_read(client->ioc, &flags, sizeof(flags), errp) < 0) { |
649 | error_prepend(errp, "read failed: "); | |
9c122ada HR |
650 | return -EIO; |
651 | } | |
9c122ada | 652 | be32_to_cpus(&flags); |
621c4f4e | 653 | trace_nbd_negotiate_options_flags(flags); |
26afa868 | 654 | if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { |
26afa868 DB |
655 | fixedNewstyle = true; |
656 | flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; | |
657 | } | |
c203c59a | 658 | if (flags & NBD_FLAG_C_NO_ZEROES) { |
23e099c3 | 659 | no_zeroes = true; |
c203c59a EB |
660 | flags &= ~NBD_FLAG_C_NO_ZEROES; |
661 | } | |
26afa868 | 662 | if (flags != 0) { |
2fd2c840 | 663 | error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags); |
621c4f4e | 664 | return -EINVAL; |
9c122ada HR |
665 | } |
666 | ||
f5076b5a | 667 | while (1) { |
9c122ada | 668 | int ret; |
7f9039cd | 669 | uint32_t option, length; |
f5076b5a HB |
670 | uint64_t magic; |
671 | ||
2fd2c840 VSO |
672 | if (nbd_read(client->ioc, &magic, sizeof(magic), errp) < 0) { |
673 | error_prepend(errp, "read failed: "); | |
f5076b5a HB |
674 | return -EINVAL; |
675 | } | |
9588463e VSO |
676 | magic = be64_to_cpu(magic); |
677 | trace_nbd_negotiate_options_check_magic(magic); | |
678 | if (magic != NBD_OPTS_MAGIC) { | |
2fd2c840 | 679 | error_setg(errp, "Bad magic received"); |
f5076b5a HB |
680 | return -EINVAL; |
681 | } | |
682 | ||
7f9039cd VSO |
683 | if (nbd_read(client->ioc, &option, |
684 | sizeof(option), errp) < 0) { | |
2fd2c840 | 685 | error_prepend(errp, "read failed: "); |
f5076b5a HB |
686 | return -EINVAL; |
687 | } | |
7f9039cd | 688 | option = be32_to_cpu(option); |
0cfae925 | 689 | client->opt = option; |
f5076b5a | 690 | |
2fd2c840 VSO |
691 | if (nbd_read(client->ioc, &length, sizeof(length), errp) < 0) { |
692 | error_prepend(errp, "read failed: "); | |
f5076b5a HB |
693 | return -EINVAL; |
694 | } | |
695 | length = be32_to_cpu(length); | |
894e0280 | 696 | assert(!client->optlen); |
0cfae925 | 697 | client->optlen = length; |
f5076b5a | 698 | |
fdad35ef EB |
699 | if (length > NBD_MAX_BUFFER_SIZE) { |
700 | error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)", | |
701 | length, NBD_MAX_BUFFER_SIZE); | |
702 | return -EINVAL; | |
703 | } | |
704 | ||
3736cc5b EB |
705 | trace_nbd_negotiate_options_check_option(option, |
706 | nbd_opt_lookup(option)); | |
f95910fe DB |
707 | if (client->tlscreds && |
708 | client->ioc == (QIOChannel *)client->sioc) { | |
709 | QIOChannel *tioc; | |
710 | if (!fixedNewstyle) { | |
7f9039cd | 711 | error_setg(errp, "Unsupported option 0x%" PRIx32, option); |
f95910fe DB |
712 | return -EINVAL; |
713 | } | |
7f9039cd | 714 | switch (option) { |
f95910fe | 715 | case NBD_OPT_STARTTLS: |
e68c35cf EB |
716 | if (length) { |
717 | /* Unconditionally drop the connection if the client | |
718 | * can't start a TLS negotiation correctly */ | |
0cfae925 | 719 | return nbd_reject_length(client, true, errp); |
e68c35cf EB |
720 | } |
721 | tioc = nbd_negotiate_handle_starttls(client, errp); | |
f95910fe DB |
722 | if (!tioc) { |
723 | return -EIO; | |
724 | } | |
8cbee49e | 725 | ret = 0; |
f95910fe DB |
726 | object_unref(OBJECT(client->ioc)); |
727 | client->ioc = QIO_CHANNEL(tioc); | |
728 | break; | |
729 | ||
d1129a8a EB |
730 | case NBD_OPT_EXPORT_NAME: |
731 | /* No way to return an error to client, so drop connection */ | |
2fd2c840 | 732 | error_setg(errp, "Option 0x%x not permitted before TLS", |
7f9039cd | 733 | option); |
d1129a8a EB |
734 | return -EINVAL; |
735 | ||
f95910fe | 736 | default: |
894e0280 EB |
737 | ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD, errp, |
738 | "Option 0x%" PRIx32 | |
739 | "not permitted before TLS", option); | |
37ec36f6 EB |
740 | /* Let the client keep trying, unless they asked to |
741 | * quit. In this mode, we've already sent an error, so | |
742 | * we can't ack the abort. */ | |
7f9039cd | 743 | if (option == NBD_OPT_ABORT) { |
1e120ffe | 744 | return 1; |
b6f5d3b5 | 745 | } |
d1129a8a | 746 | break; |
f95910fe DB |
747 | } |
748 | } else if (fixedNewstyle) { | |
7f9039cd | 749 | switch (option) { |
26afa868 | 750 | case NBD_OPT_LIST: |
e68c35cf | 751 | if (length) { |
0cfae925 | 752 | ret = nbd_reject_length(client, false, errp); |
e68c35cf EB |
753 | } else { |
754 | ret = nbd_negotiate_handle_list(client, errp); | |
755 | } | |
26afa868 DB |
756 | break; |
757 | ||
758 | case NBD_OPT_ABORT: | |
b6f5d3b5 EB |
759 | /* NBD spec says we must try to reply before |
760 | * disconnecting, but that we must also tolerate | |
761 | * guests that don't wait for our reply. */ | |
0cfae925 | 762 | nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL); |
1e120ffe | 763 | return 1; |
26afa868 DB |
764 | |
765 | case NBD_OPT_EXPORT_NAME: | |
0cfae925 | 766 | return nbd_negotiate_handle_export_name(client, |
23e099c3 EB |
767 | myflags, no_zeroes, |
768 | errp); | |
26afa868 | 769 | |
f37708f6 EB |
770 | case NBD_OPT_INFO: |
771 | case NBD_OPT_GO: | |
0cfae925 | 772 | ret = nbd_negotiate_handle_info(client, myflags, errp); |
f37708f6 EB |
773 | if (ret == 1) { |
774 | assert(option == NBD_OPT_GO); | |
775 | return 0; | |
776 | } | |
f37708f6 EB |
777 | break; |
778 | ||
f95910fe | 779 | case NBD_OPT_STARTTLS: |
e68c35cf | 780 | if (length) { |
0cfae925 | 781 | ret = nbd_reject_length(client, false, errp); |
e68c35cf | 782 | } else if (client->tlscreds) { |
0cfae925 VSO |
783 | ret = nbd_negotiate_send_rep_err(client, |
784 | NBD_REP_ERR_INVALID, errp, | |
36683283 | 785 | "TLS already enabled"); |
f95910fe | 786 | } else { |
0cfae925 VSO |
787 | ret = nbd_negotiate_send_rep_err(client, |
788 | NBD_REP_ERR_POLICY, errp, | |
36683283 | 789 | "TLS not configured"); |
63d5ef86 | 790 | } |
d1129a8a | 791 | break; |
5c54e7fa VSO |
792 | |
793 | case NBD_OPT_STRUCTURED_REPLY: | |
794 | if (length) { | |
0cfae925 | 795 | ret = nbd_reject_length(client, false, errp); |
5c54e7fa VSO |
796 | } else if (client->structured_reply) { |
797 | ret = nbd_negotiate_send_rep_err( | |
0cfae925 | 798 | client, NBD_REP_ERR_INVALID, errp, |
5c54e7fa VSO |
799 | "structured reply already negotiated"); |
800 | } else { | |
0cfae925 | 801 | ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); |
5c54e7fa VSO |
802 | client->structured_reply = true; |
803 | myflags |= NBD_FLAG_SEND_DF; | |
804 | } | |
805 | break; | |
806 | ||
26afa868 | 807 | default: |
894e0280 EB |
808 | ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp, |
809 | "Unsupported option 0x%" PRIx32 " (%s)", | |
810 | option, nbd_opt_lookup(option)); | |
156f6a10 | 811 | break; |
26afa868 DB |
812 | } |
813 | } else { | |
814 | /* | |
815 | * If broken new-style we should drop the connection | |
816 | * for anything except NBD_OPT_EXPORT_NAME | |
817 | */ | |
7f9039cd | 818 | switch (option) { |
26afa868 | 819 | case NBD_OPT_EXPORT_NAME: |
0cfae925 | 820 | return nbd_negotiate_handle_export_name(client, |
23e099c3 EB |
821 | myflags, no_zeroes, |
822 | errp); | |
26afa868 DB |
823 | |
824 | default: | |
3736cc5b EB |
825 | error_setg(errp, "Unsupported option 0x%" PRIx32 " (%s)", |
826 | option, nbd_opt_lookup(option)); | |
26afa868 | 827 | return -EINVAL; |
32d7d2e0 | 828 | } |
f5076b5a | 829 | } |
8cbee49e EB |
830 | if (ret < 0) { |
831 | return ret; | |
832 | } | |
f5076b5a HB |
833 | } |
834 | } | |
835 | ||
1e120ffe VSO |
836 | /* nbd_negotiate |
837 | * Return: | |
2fd2c840 VSO |
838 | * -errno on error, errp is set |
839 | * 0 on successful negotiation, errp is not set | |
840 | * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, | |
841 | * errp is not set | |
1e120ffe | 842 | */ |
2fd2c840 | 843 | static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) |
7a5ca864 | 844 | { |
5f66d060 | 845 | char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = ""; |
2e5c9ad6 | 846 | int ret; |
7423f417 | 847 | const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | |
1f4d6d18 EB |
848 | NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | |
849 | NBD_FLAG_SEND_WRITE_ZEROES); | |
f95910fe | 850 | bool oldStyle; |
b2e3d87f | 851 | |
5f66d060 | 852 | /* Old style negotiation header, no room for options |
6b8c01e7 PB |
853 | [ 0 .. 7] passwd ("NBDMAGIC") |
854 | [ 8 .. 15] magic (NBD_CLIENT_MAGIC) | |
b2e3d87f | 855 | [16 .. 23] size |
5f66d060 | 856 | [24 .. 27] export flags (zero-extended) |
6b8c01e7 PB |
857 | [28 .. 151] reserved (0) |
858 | ||
5f66d060 | 859 | New style negotiation header, client can send options |
6b8c01e7 PB |
860 | [ 0 .. 7] passwd ("NBDMAGIC") |
861 | [ 8 .. 15] magic (NBD_OPTS_MAGIC) | |
862 | [16 .. 17] server flags (0) | |
f37708f6 | 863 | ....options sent, ending in NBD_OPT_EXPORT_NAME or NBD_OPT_GO.... |
b2e3d87f NT |
864 | */ |
865 | ||
1c778ef7 | 866 | qio_channel_set_blocking(client->ioc, false, NULL); |
185b4338 | 867 | |
9588463e | 868 | trace_nbd_negotiate_begin(); |
b2e3d87f | 869 | memcpy(buf, "NBDMAGIC", 8); |
f95910fe DB |
870 | |
871 | oldStyle = client->exp != NULL && !client->tlscreds; | |
872 | if (oldStyle) { | |
9588463e VSO |
873 | trace_nbd_negotiate_old_style(client->exp->size, |
874 | client->exp->nbdflags | myflags); | |
667ad26f JS |
875 | stq_be_p(buf + 8, NBD_CLIENT_MAGIC); |
876 | stq_be_p(buf + 16, client->exp->size); | |
5f66d060 | 877 | stl_be_p(buf + 24, client->exp->nbdflags | myflags); |
b2e3d87f | 878 | |
2fd2c840 VSO |
879 | if (nbd_write(client->ioc, buf, sizeof(buf), errp) < 0) { |
880 | error_prepend(errp, "write failed: "); | |
d9faeed8 | 881 | return -EINVAL; |
6b8c01e7 PB |
882 | } |
883 | } else { | |
76ff081d VSO |
884 | stq_be_p(buf + 8, NBD_OPTS_MAGIC); |
885 | stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES); | |
886 | ||
2fd2c840 VSO |
887 | if (nbd_write(client->ioc, buf, 18, errp) < 0) { |
888 | error_prepend(errp, "write failed: "); | |
d9faeed8 | 889 | return -EINVAL; |
6b8c01e7 | 890 | } |
23e099c3 | 891 | ret = nbd_negotiate_options(client, myflags, errp); |
2e5c9ad6 | 892 | if (ret != 0) { |
2fd2c840 VSO |
893 | if (ret < 0) { |
894 | error_prepend(errp, "option negotiation failed: "); | |
895 | } | |
2e5c9ad6 | 896 | return ret; |
6b8c01e7 | 897 | } |
b2e3d87f NT |
898 | } |
899 | ||
0cfae925 | 900 | assert(!client->optlen); |
9588463e | 901 | trace_nbd_negotiate_success(); |
d9faeed8 VSO |
902 | |
903 | return 0; | |
7a5ca864 FB |
904 | } |
905 | ||
2fd2c840 VSO |
906 | static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request, |
907 | Error **errp) | |
75818250 | 908 | { |
fa26c26b | 909 | uint8_t buf[NBD_REQUEST_SIZE]; |
b2e3d87f | 910 | uint32_t magic; |
a0dc63a6 | 911 | int ret; |
b2e3d87f | 912 | |
2fd2c840 | 913 | ret = nbd_read(ioc, buf, sizeof(buf), errp); |
185b4338 PB |
914 | if (ret < 0) { |
915 | return ret; | |
916 | } | |
917 | ||
b2e3d87f NT |
918 | /* Request |
919 | [ 0 .. 3] magic (NBD_REQUEST_MAGIC) | |
b626b51a EB |
920 | [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) |
921 | [ 6 .. 7] type (NBD_CMD_READ, ...) | |
b2e3d87f NT |
922 | [ 8 .. 15] handle |
923 | [16 .. 23] from | |
924 | [24 .. 27] len | |
925 | */ | |
926 | ||
773dce3c | 927 | magic = ldl_be_p(buf); |
b626b51a EB |
928 | request->flags = lduw_be_p(buf + 4); |
929 | request->type = lduw_be_p(buf + 6); | |
773dce3c PM |
930 | request->handle = ldq_be_p(buf + 8); |
931 | request->from = ldq_be_p(buf + 16); | |
932 | request->len = ldl_be_p(buf + 24); | |
b2e3d87f | 933 | |
9588463e VSO |
934 | trace_nbd_receive_request(magic, request->flags, request->type, |
935 | request->from, request->len); | |
b2e3d87f NT |
936 | |
937 | if (magic != NBD_REQUEST_MAGIC) { | |
2fd2c840 | 938 | error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic); |
185b4338 | 939 | return -EINVAL; |
b2e3d87f NT |
940 | } |
941 | return 0; | |
75818250 TS |
942 | } |
943 | ||
41996e38 PB |
944 | #define MAX_NBD_REQUESTS 16 |
945 | ||
ce33967a | 946 | void nbd_client_get(NBDClient *client) |
1743b515 PB |
947 | { |
948 | client->refcount++; | |
949 | } | |
950 | ||
ce33967a | 951 | void nbd_client_put(NBDClient *client) |
1743b515 PB |
952 | { |
953 | if (--client->refcount == 0) { | |
ff2b68aa | 954 | /* The last reference should be dropped by client->close, |
f53a829b | 955 | * which is called by client_close. |
ff2b68aa PB |
956 | */ |
957 | assert(client->closing); | |
958 | ||
ff82911c | 959 | qio_channel_detach_aio_context(client->ioc); |
1c778ef7 DB |
960 | object_unref(OBJECT(client->sioc)); |
961 | object_unref(OBJECT(client->ioc)); | |
f95910fe DB |
962 | if (client->tlscreds) { |
963 | object_unref(OBJECT(client->tlscreds)); | |
964 | } | |
965 | g_free(client->tlsaclname); | |
6b8c01e7 PB |
966 | if (client->exp) { |
967 | QTAILQ_REMOVE(&client->exp->clients, client, next); | |
968 | nbd_export_put(client->exp); | |
969 | } | |
1743b515 PB |
970 | g_free(client); |
971 | } | |
972 | } | |
973 | ||
0c9390d9 | 974 | static void client_close(NBDClient *client, bool negotiated) |
1743b515 | 975 | { |
ff2b68aa PB |
976 | if (client->closing) { |
977 | return; | |
978 | } | |
979 | ||
980 | client->closing = true; | |
981 | ||
982 | /* Force requests to finish. They will drop their own references, | |
983 | * then we'll close the socket and free the NBDClient. | |
984 | */ | |
1c778ef7 DB |
985 | qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, |
986 | NULL); | |
ff2b68aa PB |
987 | |
988 | /* Also tell the client, so that they release their reference. */ | |
0c9390d9 EB |
989 | if (client->close_fn) { |
990 | client->close_fn(client, negotiated); | |
1743b515 | 991 | } |
1743b515 PB |
992 | } |
993 | ||
315f78ab | 994 | static NBDRequestData *nbd_request_get(NBDClient *client) |
d9a73806 | 995 | { |
315f78ab | 996 | NBDRequestData *req; |
72deddc5 | 997 | |
41996e38 PB |
998 | assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); |
999 | client->nb_requests++; | |
1000 | ||
315f78ab | 1001 | req = g_new0(NBDRequestData, 1); |
72deddc5 PB |
1002 | nbd_client_get(client); |
1003 | req->client = client; | |
d9a73806 PB |
1004 | return req; |
1005 | } | |
1006 | ||
315f78ab | 1007 | static void nbd_request_put(NBDRequestData *req) |
d9a73806 | 1008 | { |
72deddc5 | 1009 | NBDClient *client = req->client; |
e1adb27a | 1010 | |
2d821488 SH |
1011 | if (req->data) { |
1012 | qemu_vfree(req->data); | |
1013 | } | |
1729404c | 1014 | g_free(req); |
e1adb27a | 1015 | |
958c717d | 1016 | client->nb_requests--; |
ff82911c PB |
1017 | nbd_client_receive_next_request(client); |
1018 | ||
72deddc5 | 1019 | nbd_client_put(client); |
d9a73806 PB |
1020 | } |
1021 | ||
aadf99a7 | 1022 | static void blk_aio_attached(AioContext *ctx, void *opaque) |
f2149281 HR |
1023 | { |
1024 | NBDExport *exp = opaque; | |
1025 | NBDClient *client; | |
1026 | ||
9588463e | 1027 | trace_nbd_blk_aio_attached(exp->name, ctx); |
f2149281 HR |
1028 | |
1029 | exp->ctx = ctx; | |
1030 | ||
1031 | QTAILQ_FOREACH(client, &exp->clients, next) { | |
ff82911c PB |
1032 | qio_channel_attach_aio_context(client->ioc, ctx); |
1033 | if (client->recv_coroutine) { | |
1034 | aio_co_schedule(ctx, client->recv_coroutine); | |
1035 | } | |
1036 | if (client->send_coroutine) { | |
1037 | aio_co_schedule(ctx, client->send_coroutine); | |
1038 | } | |
f2149281 HR |
1039 | } |
1040 | } | |
1041 | ||
aadf99a7 | 1042 | static void blk_aio_detach(void *opaque) |
f2149281 HR |
1043 | { |
1044 | NBDExport *exp = opaque; | |
1045 | NBDClient *client; | |
1046 | ||
9588463e | 1047 | trace_nbd_blk_aio_detach(exp->name, exp->ctx); |
f2149281 HR |
1048 | |
1049 | QTAILQ_FOREACH(client, &exp->clients, next) { | |
ff82911c | 1050 | qio_channel_detach_aio_context(client->ioc); |
f2149281 HR |
1051 | } |
1052 | ||
1053 | exp->ctx = NULL; | |
1054 | } | |
1055 | ||
741cc431 HR |
1056 | static void nbd_eject_notifier(Notifier *n, void *data) |
1057 | { | |
1058 | NBDExport *exp = container_of(n, NBDExport, eject_notifier); | |
1059 | nbd_export_close(exp); | |
1060 | } | |
1061 | ||
cd7fca95 | 1062 | NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, |
7423f417 | 1063 | uint16_t nbdflags, void (*close)(NBDExport *), |
cd7fca95 | 1064 | bool writethrough, BlockBackend *on_eject_blk, |
98f44bbe | 1065 | Error **errp) |
af49bbbe | 1066 | { |
3dff24f2 | 1067 | AioContext *ctx; |
cd7fca95 | 1068 | BlockBackend *blk; |
e8d3eb74 | 1069 | NBDExport *exp = g_new0(NBDExport, 1); |
8a7ce4f9 | 1070 | uint64_t perm; |
d7086422 | 1071 | int ret; |
cd7fca95 | 1072 | |
3dff24f2 KW |
1073 | /* |
1074 | * NBD exports are used for non-shared storage migration. Make sure | |
1075 | * that BDRV_O_INACTIVE is cleared and the image is ready for write | |
1076 | * access since the export could be available before migration handover. | |
1077 | */ | |
1078 | ctx = bdrv_get_aio_context(bs); | |
1079 | aio_context_acquire(ctx); | |
1080 | bdrv_invalidate_cache(bs, NULL); | |
1081 | aio_context_release(ctx); | |
1082 | ||
8a7ce4f9 KW |
1083 | /* Don't allow resize while the NBD server is running, otherwise we don't |
1084 | * care what happens with the node. */ | |
1085 | perm = BLK_PERM_CONSISTENT_READ; | |
1086 | if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { | |
1087 | perm |= BLK_PERM_WRITE; | |
1088 | } | |
1089 | blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | | |
1090 | BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); | |
d7086422 KW |
1091 | ret = blk_insert_bs(blk, bs, errp); |
1092 | if (ret < 0) { | |
1093 | goto fail; | |
1094 | } | |
cd7fca95 KW |
1095 | blk_set_enable_write_cache(blk, !writethrough); |
1096 | ||
2c8d9f06 | 1097 | exp->refcount = 1; |
4b9441f6 | 1098 | QTAILQ_INIT(&exp->clients); |
aadf99a7 | 1099 | exp->blk = blk; |
af49bbbe PB |
1100 | exp->dev_offset = dev_offset; |
1101 | exp->nbdflags = nbdflags; | |
98f44bbe HR |
1102 | exp->size = size < 0 ? blk_getlength(blk) : size; |
1103 | if (exp->size < 0) { | |
1104 | error_setg_errno(errp, -exp->size, | |
1105 | "Failed to determine the NBD export's length"); | |
1106 | goto fail; | |
1107 | } | |
1108 | exp->size -= exp->size % BDRV_SECTOR_SIZE; | |
1109 | ||
0ddf08db | 1110 | exp->close = close; |
aadf99a7 | 1111 | exp->ctx = blk_get_aio_context(blk); |
aadf99a7 | 1112 | blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); |
741cc431 | 1113 | |
cd7fca95 KW |
1114 | if (on_eject_blk) { |
1115 | blk_ref(on_eject_blk); | |
1116 | exp->eject_notifier_blk = on_eject_blk; | |
1117 | exp->eject_notifier.notify = nbd_eject_notifier; | |
1118 | blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier); | |
1119 | } | |
af49bbbe | 1120 | return exp; |
98f44bbe HR |
1121 | |
1122 | fail: | |
cd7fca95 | 1123 | blk_unref(blk); |
98f44bbe HR |
1124 | g_free(exp); |
1125 | return NULL; | |
af49bbbe PB |
1126 | } |
1127 | ||
ee0a19ec PB |
1128 | NBDExport *nbd_export_find(const char *name) |
1129 | { | |
1130 | NBDExport *exp; | |
1131 | QTAILQ_FOREACH(exp, &exports, next) { | |
1132 | if (strcmp(name, exp->name) == 0) { | |
1133 | return exp; | |
1134 | } | |
1135 | } | |
1136 | ||
1137 | return NULL; | |
1138 | } | |
1139 | ||
1140 | void nbd_export_set_name(NBDExport *exp, const char *name) | |
1141 | { | |
1142 | if (exp->name == name) { | |
1143 | return; | |
1144 | } | |
1145 | ||
1146 | nbd_export_get(exp); | |
1147 | if (exp->name != NULL) { | |
1148 | g_free(exp->name); | |
1149 | exp->name = NULL; | |
1150 | QTAILQ_REMOVE(&exports, exp, next); | |
1151 | nbd_export_put(exp); | |
1152 | } | |
1153 | if (name != NULL) { | |
1154 | nbd_export_get(exp); | |
1155 | exp->name = g_strdup(name); | |
1156 | QTAILQ_INSERT_TAIL(&exports, exp, next); | |
1157 | } | |
1158 | nbd_export_put(exp); | |
1159 | } | |
1160 | ||
b1a75b33 EB |
1161 | void nbd_export_set_description(NBDExport *exp, const char *description) |
1162 | { | |
1163 | g_free(exp->description); | |
1164 | exp->description = g_strdup(description); | |
1165 | } | |
1166 | ||
af49bbbe PB |
1167 | void nbd_export_close(NBDExport *exp) |
1168 | { | |
4b9441f6 | 1169 | NBDClient *client, *next; |
2c8d9f06 | 1170 | |
4b9441f6 PB |
1171 | nbd_export_get(exp); |
1172 | QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { | |
0c9390d9 | 1173 | client_close(client, true); |
4b9441f6 | 1174 | } |
125afda8 | 1175 | nbd_export_set_name(exp, NULL); |
b1a75b33 | 1176 | nbd_export_set_description(exp, NULL); |
4b9441f6 | 1177 | nbd_export_put(exp); |
2c8d9f06 PB |
1178 | } |
1179 | ||
a3b0dc75 VSO |
1180 | void nbd_export_remove(NBDExport *exp, NbdServerRemoveMode mode, Error **errp) |
1181 | { | |
1182 | if (mode == NBD_SERVER_REMOVE_MODE_HARD || QTAILQ_EMPTY(&exp->clients)) { | |
1183 | nbd_export_close(exp); | |
1184 | return; | |
1185 | } | |
1186 | ||
1187 | assert(mode == NBD_SERVER_REMOVE_MODE_SAFE); | |
1188 | ||
1189 | error_setg(errp, "export '%s' still in use", exp->name); | |
1190 | error_append_hint(errp, "Use mode='hard' to force client disconnect\n"); | |
1191 | } | |
1192 | ||
2c8d9f06 PB |
1193 | void nbd_export_get(NBDExport *exp) |
1194 | { | |
1195 | assert(exp->refcount > 0); | |
1196 | exp->refcount++; | |
1197 | } | |
1198 | ||
1199 | void nbd_export_put(NBDExport *exp) | |
1200 | { | |
1201 | assert(exp->refcount > 0); | |
1202 | if (exp->refcount == 1) { | |
1203 | nbd_export_close(exp); | |
d9a73806 PB |
1204 | } |
1205 | ||
9156245e VSO |
1206 | /* nbd_export_close() may theoretically reduce refcount to 0. It may happen |
1207 | * if someone calls nbd_export_put() on named export not through | |
1208 | * nbd_export_set_name() when refcount is 1. So, let's assert that | |
1209 | * it is > 0. | |
1210 | */ | |
1211 | assert(exp->refcount > 0); | |
2c8d9f06 | 1212 | if (--exp->refcount == 0) { |
ee0a19ec | 1213 | assert(exp->name == NULL); |
b1a75b33 | 1214 | assert(exp->description == NULL); |
ee0a19ec | 1215 | |
0ddf08db PB |
1216 | if (exp->close) { |
1217 | exp->close(exp); | |
1218 | } | |
1219 | ||
d6268348 | 1220 | if (exp->blk) { |
cd7fca95 KW |
1221 | if (exp->eject_notifier_blk) { |
1222 | notifier_remove(&exp->eject_notifier); | |
1223 | blk_unref(exp->eject_notifier_blk); | |
1224 | } | |
d6268348 WC |
1225 | blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, |
1226 | blk_aio_detach, exp); | |
1227 | blk_unref(exp->blk); | |
1228 | exp->blk = NULL; | |
1229 | } | |
1230 | ||
2c8d9f06 PB |
1231 | g_free(exp); |
1232 | } | |
af49bbbe PB |
1233 | } |
1234 | ||
e140177d | 1235 | BlockBackend *nbd_export_get_blockdev(NBDExport *exp) |
125afda8 | 1236 | { |
aadf99a7 | 1237 | return exp->blk; |
125afda8 PB |
1238 | } |
1239 | ||
ee0a19ec PB |
1240 | void nbd_export_close_all(void) |
1241 | { | |
1242 | NBDExport *exp, *next; | |
1243 | ||
1244 | QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { | |
1245 | nbd_export_close(exp); | |
ee0a19ec PB |
1246 | } |
1247 | } | |
1248 | ||
de79bfc3 VSO |
1249 | static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov, |
1250 | unsigned niov, Error **errp) | |
1251 | { | |
1252 | int ret; | |
1253 | ||
1254 | g_assert(qemu_in_coroutine()); | |
1255 | qemu_co_mutex_lock(&client->send_lock); | |
1256 | client->send_coroutine = qemu_coroutine_self(); | |
1257 | ||
1258 | ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0; | |
1259 | ||
1260 | client->send_coroutine = NULL; | |
1261 | qemu_co_mutex_unlock(&client->send_lock); | |
1262 | ||
1263 | return ret; | |
1264 | } | |
1265 | ||
caad5384 VSO |
1266 | static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error, |
1267 | uint64_t handle) | |
1268 | { | |
1269 | stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC); | |
1270 | stl_be_p(&reply->error, error); | |
1271 | stq_be_p(&reply->handle, handle); | |
1272 | } | |
1273 | ||
978df1b6 | 1274 | static int nbd_co_send_simple_reply(NBDClient *client, |
14cea41d VSO |
1275 | uint64_t handle, |
1276 | uint32_t error, | |
978df1b6 VSO |
1277 | void *data, |
1278 | size_t len, | |
1279 | Error **errp) | |
22045592 | 1280 | { |
de79bfc3 | 1281 | NBDSimpleReply reply; |
14cea41d | 1282 | int nbd_err = system_errno_to_nbd_errno(error); |
de79bfc3 VSO |
1283 | struct iovec iov[] = { |
1284 | {.iov_base = &reply, .iov_len = sizeof(reply)}, | |
1285 | {.iov_base = data, .iov_len = len} | |
1286 | }; | |
6fb2b972 | 1287 | |
e7a78d0e EB |
1288 | trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err), |
1289 | len); | |
de79bfc3 | 1290 | set_be_simple_reply(&reply, nbd_err, handle); |
262db388 | 1291 | |
de79bfc3 | 1292 | return nbd_co_send_iov(client, iov, len ? 2 : 1, errp); |
22045592 PB |
1293 | } |
1294 | ||
5c54e7fa VSO |
1295 | static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags, |
1296 | uint16_t type, uint64_t handle, uint32_t length) | |
1297 | { | |
1298 | stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC); | |
1299 | stw_be_p(&chunk->flags, flags); | |
1300 | stw_be_p(&chunk->type, type); | |
1301 | stq_be_p(&chunk->handle, handle); | |
1302 | stl_be_p(&chunk->length, length); | |
1303 | } | |
1304 | ||
ef8c887e EB |
1305 | static int coroutine_fn nbd_co_send_structured_done(NBDClient *client, |
1306 | uint64_t handle, | |
1307 | Error **errp) | |
1308 | { | |
1309 | NBDStructuredReplyChunk chunk; | |
1310 | struct iovec iov[] = { | |
1311 | {.iov_base = &chunk, .iov_len = sizeof(chunk)}, | |
1312 | }; | |
1313 | ||
1314 | trace_nbd_co_send_structured_done(handle); | |
1315 | set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0); | |
1316 | ||
1317 | return nbd_co_send_iov(client, iov, 1, errp); | |
1318 | } | |
1319 | ||
5c54e7fa VSO |
1320 | static int coroutine_fn nbd_co_send_structured_read(NBDClient *client, |
1321 | uint64_t handle, | |
1322 | uint64_t offset, | |
1323 | void *data, | |
1324 | size_t size, | |
418638d3 | 1325 | bool final, |
5c54e7fa VSO |
1326 | Error **errp) |
1327 | { | |
efdc0c10 | 1328 | NBDStructuredReadData chunk; |
5c54e7fa VSO |
1329 | struct iovec iov[] = { |
1330 | {.iov_base = &chunk, .iov_len = sizeof(chunk)}, | |
1331 | {.iov_base = data, .iov_len = size} | |
1332 | }; | |
1333 | ||
ef8c887e | 1334 | assert(size); |
5c54e7fa | 1335 | trace_nbd_co_send_structured_read(handle, offset, data, size); |
418638d3 EB |
1336 | set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0, |
1337 | NBD_REPLY_TYPE_OFFSET_DATA, handle, | |
1338 | sizeof(chunk) - sizeof(chunk.h) + size); | |
5c54e7fa VSO |
1339 | stq_be_p(&chunk.offset, offset); |
1340 | ||
1341 | return nbd_co_send_iov(client, iov, 2, errp); | |
1342 | } | |
1343 | ||
418638d3 EB |
1344 | static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, |
1345 | uint64_t handle, | |
1346 | uint64_t offset, | |
1347 | uint8_t *data, | |
1348 | size_t size, | |
1349 | Error **errp) | |
1350 | { | |
1351 | int ret = 0; | |
1352 | NBDExport *exp = client->exp; | |
1353 | size_t progress = 0; | |
1354 | ||
1355 | while (progress < size) { | |
1356 | int64_t pnum; | |
1357 | int status = bdrv_block_status_above(blk_bs(exp->blk), NULL, | |
1358 | offset + progress, | |
1359 | size - progress, &pnum, NULL, | |
1360 | NULL); | |
e2de3256 | 1361 | bool final; |
418638d3 EB |
1362 | |
1363 | if (status < 0) { | |
1364 | error_setg_errno(errp, -status, "unable to check for holes"); | |
1365 | return status; | |
1366 | } | |
1367 | assert(pnum && pnum <= size - progress); | |
e2de3256 | 1368 | final = progress + pnum == size; |
418638d3 EB |
1369 | if (status & BDRV_BLOCK_ZERO) { |
1370 | NBDStructuredReadHole chunk; | |
1371 | struct iovec iov[] = { | |
1372 | {.iov_base = &chunk, .iov_len = sizeof(chunk)}, | |
1373 | }; | |
1374 | ||
1375 | trace_nbd_co_send_structured_read_hole(handle, offset + progress, | |
1376 | pnum); | |
e2de3256 EB |
1377 | set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0, |
1378 | NBD_REPLY_TYPE_OFFSET_HOLE, | |
418638d3 EB |
1379 | handle, sizeof(chunk) - sizeof(chunk.h)); |
1380 | stq_be_p(&chunk.offset, offset + progress); | |
1381 | stl_be_p(&chunk.length, pnum); | |
1382 | ret = nbd_co_send_iov(client, iov, 1, errp); | |
1383 | } else { | |
1384 | ret = blk_pread(exp->blk, offset + progress + exp->dev_offset, | |
1385 | data + progress, pnum); | |
1386 | if (ret < 0) { | |
1387 | error_setg_errno(errp, -ret, "reading from file failed"); | |
1388 | break; | |
1389 | } | |
1390 | ret = nbd_co_send_structured_read(client, handle, offset + progress, | |
e2de3256 | 1391 | data + progress, pnum, final, |
418638d3 EB |
1392 | errp); |
1393 | } | |
1394 | ||
1395 | if (ret < 0) { | |
1396 | break; | |
1397 | } | |
1398 | progress += pnum; | |
1399 | } | |
418638d3 EB |
1400 | return ret; |
1401 | } | |
1402 | ||
5c54e7fa VSO |
1403 | static int coroutine_fn nbd_co_send_structured_error(NBDClient *client, |
1404 | uint64_t handle, | |
1405 | uint32_t error, | |
a57f6dea | 1406 | const char *msg, |
5c54e7fa VSO |
1407 | Error **errp) |
1408 | { | |
1409 | NBDStructuredError chunk; | |
1410 | int nbd_err = system_errno_to_nbd_errno(error); | |
1411 | struct iovec iov[] = { | |
1412 | {.iov_base = &chunk, .iov_len = sizeof(chunk)}, | |
a57f6dea | 1413 | {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0}, |
5c54e7fa VSO |
1414 | }; |
1415 | ||
1416 | assert(nbd_err); | |
1417 | trace_nbd_co_send_structured_error(handle, nbd_err, | |
a57f6dea | 1418 | nbd_err_lookup(nbd_err), msg ? msg : ""); |
5c54e7fa | 1419 | set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle, |
a57f6dea | 1420 | sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); |
5c54e7fa | 1421 | stl_be_p(&chunk.error, nbd_err); |
a57f6dea | 1422 | stw_be_p(&chunk.message_length, iov[1].iov_len); |
5c54e7fa | 1423 | |
a57f6dea | 1424 | return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp); |
5c54e7fa VSO |
1425 | } |
1426 | ||
2a6e128b VSO |
1427 | /* nbd_co_receive_request |
1428 | * Collect a client request. Return 0 if request looks valid, -EIO to drop | |
1429 | * connection right away, and any other negative value to report an error to | |
1430 | * the client (although the caller may still need to disconnect after reporting | |
1431 | * the error). | |
1432 | */ | |
2fd2c840 VSO |
1433 | static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, |
1434 | Error **errp) | |
a030b347 | 1435 | { |
72deddc5 | 1436 | NBDClient *client = req->client; |
5c54e7fa | 1437 | int valid_flags; |
a030b347 | 1438 | |
1c778ef7 | 1439 | g_assert(qemu_in_coroutine()); |
ff82911c | 1440 | assert(client->recv_coroutine == qemu_coroutine_self()); |
2fd2c840 | 1441 | if (nbd_receive_request(client->ioc, request, errp) < 0) { |
ee898b87 | 1442 | return -EIO; |
a030b347 PB |
1443 | } |
1444 | ||
3736cc5b EB |
1445 | trace_nbd_co_receive_request_decode_type(request->handle, request->type, |
1446 | nbd_cmd_lookup(request->type)); | |
29b6c3b3 | 1447 | |
b626b51a | 1448 | if (request->type != NBD_CMD_WRITE) { |
29b6c3b3 EB |
1449 | /* No payload, we are ready to read the next request. */ |
1450 | req->complete = true; | |
1451 | } | |
1452 | ||
b626b51a | 1453 | if (request->type == NBD_CMD_DISC) { |
29b6c3b3 EB |
1454 | /* Special case: we're going to disconnect without a reply, |
1455 | * whether or not flags, from, or len are bogus */ | |
ee898b87 | 1456 | return -EIO; |
29b6c3b3 EB |
1457 | } |
1458 | ||
b626b51a | 1459 | if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) { |
eb38c3b6 | 1460 | if (request->len > NBD_MAX_BUFFER_SIZE) { |
2fd2c840 VSO |
1461 | error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)", |
1462 | request->len, NBD_MAX_BUFFER_SIZE); | |
ee898b87 | 1463 | return -EINVAL; |
eb38c3b6 PB |
1464 | } |
1465 | ||
f1c17521 PB |
1466 | req->data = blk_try_blockalign(client->exp->blk, request->len); |
1467 | if (req->data == NULL) { | |
2fd2c840 | 1468 | error_setg(errp, "No memory"); |
ee898b87 | 1469 | return -ENOMEM; |
f1c17521 | 1470 | } |
2d821488 | 1471 | } |
b626b51a | 1472 | if (request->type == NBD_CMD_WRITE) { |
2fd2c840 VSO |
1473 | if (nbd_read(client->ioc, req->data, request->len, errp) < 0) { |
1474 | error_prepend(errp, "reading from socket failed: "); | |
ee898b87 | 1475 | return -EIO; |
a030b347 | 1476 | } |
29b6c3b3 | 1477 | req->complete = true; |
6fb2b972 | 1478 | |
9588463e VSO |
1479 | trace_nbd_co_receive_request_payload_received(request->handle, |
1480 | request->len); | |
a030b347 | 1481 | } |
29b6c3b3 | 1482 | |
fed5f8f8 EB |
1483 | /* Sanity checks. */ |
1484 | if (client->exp->nbdflags & NBD_FLAG_READ_ONLY && | |
1485 | (request->type == NBD_CMD_WRITE || | |
1486 | request->type == NBD_CMD_WRITE_ZEROES || | |
1487 | request->type == NBD_CMD_TRIM)) { | |
1488 | error_setg(errp, "Export is read-only"); | |
1489 | return -EROFS; | |
1490 | } | |
1491 | if (request->from > client->exp->size || | |
1492 | request->from + request->len > client->exp->size) { | |
2fd2c840 VSO |
1493 | error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 |
1494 | ", Size: %" PRIu64, request->from, request->len, | |
1495 | (uint64_t)client->exp->size); | |
fed5f8f8 EB |
1496 | return (request->type == NBD_CMD_WRITE || |
1497 | request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL; | |
29b6c3b3 | 1498 | } |
5c54e7fa VSO |
1499 | valid_flags = NBD_CMD_FLAG_FUA; |
1500 | if (request->type == NBD_CMD_READ && client->structured_reply) { | |
1501 | valid_flags |= NBD_CMD_FLAG_DF; | |
1502 | } else if (request->type == NBD_CMD_WRITE_ZEROES) { | |
1503 | valid_flags |= NBD_CMD_FLAG_NO_HOLE; | |
ab7c548e | 1504 | } |
5c54e7fa VSO |
1505 | if (request->flags & ~valid_flags) { |
1506 | error_setg(errp, "unsupported flags for command %s (got 0x%x)", | |
1507 | nbd_cmd_lookup(request->type), request->flags); | |
ee898b87 | 1508 | return -EINVAL; |
1f4d6d18 | 1509 | } |
29b6c3b3 | 1510 | |
ee898b87 | 1511 | return 0; |
a030b347 PB |
1512 | } |
1513 | ||
ff82911c PB |
1514 | /* Owns a reference to the NBDClient passed as opaque. */ |
1515 | static coroutine_fn void nbd_trip(void *opaque) | |
75818250 | 1516 | { |
262db388 | 1517 | NBDClient *client = opaque; |
1743b515 | 1518 | NBDExport *exp = client->exp; |
315f78ab | 1519 | NBDRequestData *req; |
ff82911c | 1520 | NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ |
a0dc63a6 | 1521 | int ret; |
a0c30369 | 1522 | int flags; |
8c372a02 | 1523 | int reply_data_len = 0; |
2fd2c840 | 1524 | Error *local_err = NULL; |
a57f6dea | 1525 | char *msg = NULL; |
b2e3d87f | 1526 | |
9588463e | 1527 | trace_nbd_trip(); |
ff2b68aa | 1528 | if (client->closing) { |
ff82911c | 1529 | nbd_client_put(client); |
ff2b68aa PB |
1530 | return; |
1531 | } | |
b2e3d87f | 1532 | |
ff2b68aa | 1533 | req = nbd_request_get(client); |
2fd2c840 | 1534 | ret = nbd_co_receive_request(req, &request, &local_err); |
ee898b87 VSO |
1535 | client->recv_coroutine = NULL; |
1536 | nbd_client_receive_next_request(client); | |
a030b347 | 1537 | if (ret == -EIO) { |
8c372a02 | 1538 | goto disconnect; |
a030b347 | 1539 | } |
b2e3d87f | 1540 | |
a030b347 | 1541 | if (ret < 0) { |
8c372a02 | 1542 | goto reply; |
b2e3d87f | 1543 | } |
b2e3d87f | 1544 | |
d6268348 WC |
1545 | if (client->closing) { |
1546 | /* | |
1547 | * The client may be closed when we are blocked in | |
1548 | * nbd_co_receive_request() | |
1549 | */ | |
1550 | goto done; | |
1551 | } | |
1552 | ||
b626b51a | 1553 | switch (request.type) { |
b2e3d87f | 1554 | case NBD_CMD_READ: |
b626b51a EB |
1555 | /* XXX: NBD Protocol only documents use of FUA with WRITE */ |
1556 | if (request.flags & NBD_CMD_FLAG_FUA) { | |
aadf99a7 | 1557 | ret = blk_co_flush(exp->blk); |
e25ceb76 | 1558 | if (ret < 0) { |
2fd2c840 | 1559 | error_setg_errno(&local_err, -ret, "flush failed"); |
8c372a02 | 1560 | break; |
e25ceb76 PB |
1561 | } |
1562 | } | |
1563 | ||
e2de3256 EB |
1564 | if (client->structured_reply && !(request.flags & NBD_CMD_FLAG_DF) && |
1565 | request.len) { | |
418638d3 EB |
1566 | ret = nbd_co_send_sparse_read(req->client, request.handle, |
1567 | request.from, req->data, request.len, | |
1568 | &local_err); | |
1569 | if (ret < 0) { | |
1570 | goto reply; | |
1571 | } | |
1572 | goto done; | |
1573 | } | |
1574 | ||
df7b97ff EB |
1575 | ret = blk_pread(exp->blk, request.from + exp->dev_offset, |
1576 | req->data, request.len); | |
adcf6302 | 1577 | if (ret < 0) { |
2fd2c840 | 1578 | error_setg_errno(&local_err, -ret, "reading from file failed"); |
8c372a02 | 1579 | break; |
b2e3d87f | 1580 | } |
b2e3d87f | 1581 | |
8c372a02 | 1582 | reply_data_len = request.len; |
8c372a02 | 1583 | |
b2e3d87f NT |
1584 | break; |
1585 | case NBD_CMD_WRITE: | |
a0c30369 | 1586 | flags = 0; |
b626b51a | 1587 | if (request.flags & NBD_CMD_FLAG_FUA) { |
a0c30369 EB |
1588 | flags |= BDRV_REQ_FUA; |
1589 | } | |
df7b97ff | 1590 | ret = blk_pwrite(exp->blk, request.from + exp->dev_offset, |
a0c30369 | 1591 | req->data, request.len, flags); |
fae69416 | 1592 | if (ret < 0) { |
2fd2c840 | 1593 | error_setg_errno(&local_err, -ret, "writing to file failed"); |
fae69416 | 1594 | } |
b2e3d87f | 1595 | |
1f4d6d18 | 1596 | break; |
1f4d6d18 | 1597 | case NBD_CMD_WRITE_ZEROES: |
1f4d6d18 EB |
1598 | flags = 0; |
1599 | if (request.flags & NBD_CMD_FLAG_FUA) { | |
1600 | flags |= BDRV_REQ_FUA; | |
1601 | } | |
1602 | if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { | |
1603 | flags |= BDRV_REQ_MAY_UNMAP; | |
1604 | } | |
1605 | ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset, | |
1606 | request.len, flags); | |
1607 | if (ret < 0) { | |
2fd2c840 | 1608 | error_setg_errno(&local_err, -ret, "writing to file failed"); |
1f4d6d18 EB |
1609 | } |
1610 | ||
b2e3d87f NT |
1611 | break; |
1612 | case NBD_CMD_DISC: | |
29b6c3b3 EB |
1613 | /* unreachable, thanks to special case in nbd_co_receive_request() */ |
1614 | abort(); | |
1615 | ||
1486d04a | 1616 | case NBD_CMD_FLUSH: |
aadf99a7 | 1617 | ret = blk_co_flush(exp->blk); |
1486d04a | 1618 | if (ret < 0) { |
2fd2c840 | 1619 | error_setg_errno(&local_err, -ret, "flush failed"); |
1486d04a | 1620 | } |
8c372a02 | 1621 | |
7a706633 PB |
1622 | break; |
1623 | case NBD_CMD_TRIM: | |
1c6c4bb7 EB |
1624 | ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset, |
1625 | request.len); | |
1626 | if (ret < 0) { | |
2fd2c840 | 1627 | error_setg_errno(&local_err, -ret, "discard failed"); |
7a706633 | 1628 | } |
8c372a02 | 1629 | |
1486d04a | 1630 | break; |
b2e3d87f | 1631 | default: |
2fd2c840 VSO |
1632 | error_setg(&local_err, "invalid request type (%" PRIu32 ") received", |
1633 | request.type); | |
14cea41d | 1634 | ret = -EINVAL; |
8c372a02 VSO |
1635 | } |
1636 | ||
1637 | reply: | |
2fd2c840 | 1638 | if (local_err) { |
14cea41d VSO |
1639 | /* If we get here, local_err was not a fatal error, and should be sent |
1640 | * to the client. */ | |
a57f6dea EB |
1641 | assert(ret < 0); |
1642 | msg = g_strdup(error_get_pretty(local_err)); | |
2fd2c840 VSO |
1643 | error_report_err(local_err); |
1644 | local_err = NULL; | |
1645 | } | |
1646 | ||
a57f6dea EB |
1647 | if (client->structured_reply && |
1648 | (ret < 0 || request.type == NBD_CMD_READ)) { | |
5c54e7fa VSO |
1649 | if (ret < 0) { |
1650 | ret = nbd_co_send_structured_error(req->client, request.handle, | |
a57f6dea | 1651 | -ret, msg, &local_err); |
ef8c887e | 1652 | } else if (reply_data_len) { |
5c54e7fa VSO |
1653 | ret = nbd_co_send_structured_read(req->client, request.handle, |
1654 | request.from, req->data, | |
418638d3 EB |
1655 | reply_data_len, true, |
1656 | &local_err); | |
ef8c887e EB |
1657 | } else { |
1658 | ret = nbd_co_send_structured_done(req->client, request.handle, | |
1659 | &local_err); | |
5c54e7fa VSO |
1660 | } |
1661 | } else { | |
1662 | ret = nbd_co_send_simple_reply(req->client, request.handle, | |
1663 | ret < 0 ? -ret : 0, | |
1664 | req->data, reply_data_len, &local_err); | |
1665 | } | |
a57f6dea | 1666 | g_free(msg); |
5c54e7fa | 1667 | if (ret < 0) { |
c7b97282 | 1668 | error_prepend(&local_err, "Failed to send reply: "); |
2fd2c840 VSO |
1669 | goto disconnect; |
1670 | } | |
1671 | ||
8c372a02 VSO |
1672 | /* We must disconnect after NBD_CMD_WRITE if we did not |
1673 | * read the payload. | |
1674 | */ | |
2fd2c840 VSO |
1675 | if (!req->complete) { |
1676 | error_setg(&local_err, "Request handling failed in intermediate state"); | |
8c372a02 | 1677 | goto disconnect; |
b2e3d87f NT |
1678 | } |
1679 | ||
7fe7b68b | 1680 | done: |
262db388 | 1681 | nbd_request_put(req); |
ff82911c | 1682 | nbd_client_put(client); |
262db388 PB |
1683 | return; |
1684 | ||
8c372a02 | 1685 | disconnect: |
2fd2c840 VSO |
1686 | if (local_err) { |
1687 | error_reportf_err(local_err, "Disconnect client, due to: "); | |
1688 | } | |
72deddc5 | 1689 | nbd_request_put(req); |
0c9390d9 | 1690 | client_close(client, true); |
ff82911c | 1691 | nbd_client_put(client); |
7a5ca864 | 1692 | } |
af49bbbe | 1693 | |
ff82911c | 1694 | static void nbd_client_receive_next_request(NBDClient *client) |
958c717d | 1695 | { |
ff82911c PB |
1696 | if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) { |
1697 | nbd_client_get(client); | |
1698 | client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); | |
1699 | aio_co_schedule(client->exp->ctx, client->recv_coroutine); | |
958c717d HR |
1700 | } |
1701 | } | |
1702 | ||
1a6245a5 FZ |
1703 | static coroutine_fn void nbd_co_client_start(void *opaque) |
1704 | { | |
c84087f2 | 1705 | NBDClient *client = opaque; |
1a6245a5 | 1706 | NBDExport *exp = client->exp; |
2fd2c840 | 1707 | Error *local_err = NULL; |
1a6245a5 FZ |
1708 | |
1709 | if (exp) { | |
1710 | nbd_export_get(exp); | |
df8ad9f1 | 1711 | QTAILQ_INSERT_TAIL(&exp->clients, client, next); |
1a6245a5 | 1712 | } |
df8ad9f1 EB |
1713 | qemu_co_mutex_init(&client->send_lock); |
1714 | ||
2fd2c840 VSO |
1715 | if (nbd_negotiate(client, &local_err)) { |
1716 | if (local_err) { | |
1717 | error_report_err(local_err); | |
1718 | } | |
0c9390d9 | 1719 | client_close(client, false); |
c84087f2 | 1720 | return; |
1a6245a5 | 1721 | } |
ff82911c PB |
1722 | |
1723 | nbd_client_receive_next_request(client); | |
1a6245a5 FZ |
1724 | } |
1725 | ||
0c9390d9 EB |
1726 | /* |
1727 | * Create a new client listener on the given export @exp, using the | |
1728 | * given channel @sioc. Begin servicing it in a coroutine. When the | |
1729 | * connection closes, call @close_fn with an indication of whether the | |
1730 | * client completed negotiation. | |
1731 | */ | |
1c778ef7 DB |
1732 | void nbd_client_new(NBDExport *exp, |
1733 | QIOChannelSocket *sioc, | |
f95910fe DB |
1734 | QCryptoTLSCreds *tlscreds, |
1735 | const char *tlsaclname, | |
0c9390d9 | 1736 | void (*close_fn)(NBDClient *, bool)) |
af49bbbe | 1737 | { |
1743b515 | 1738 | NBDClient *client; |
c84087f2 | 1739 | Coroutine *co; |
1a6245a5 | 1740 | |
e8d3eb74 | 1741 | client = g_new0(NBDClient, 1); |
1743b515 PB |
1742 | client->refcount = 1; |
1743 | client->exp = exp; | |
f95910fe DB |
1744 | client->tlscreds = tlscreds; |
1745 | if (tlscreds) { | |
1746 | object_ref(OBJECT(client->tlscreds)); | |
1747 | } | |
1748 | client->tlsaclname = g_strdup(tlsaclname); | |
1c778ef7 DB |
1749 | client->sioc = sioc; |
1750 | object_ref(OBJECT(client->sioc)); | |
1751 | client->ioc = QIO_CHANNEL(sioc); | |
1752 | object_ref(OBJECT(client->ioc)); | |
0c9390d9 | 1753 | client->close_fn = close_fn; |
2c8d9f06 | 1754 | |
c84087f2 VSO |
1755 | co = qemu_coroutine_create(nbd_co_client_start, client); |
1756 | qemu_coroutine_enter(co); | |
af49bbbe | 1757 | } |