2 * QEMU Block driver for native access to files on NFS shares
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
28 #include "qemu-common.h"
29 #include "qemu/config-file.h"
30 #include "qemu/error-report.h"
31 #include "qapi/error.h"
32 #include "block/block_int.h"
36 #include "qemu/cutils.h"
37 #include "sysemu/sysemu.h"
38 #include "qapi/qmp/qdict.h"
39 #include "qapi/qmp/qstring.h"
40 #include "qapi-visit.h"
41 #include "qapi/qobject-input-visitor.h"
42 #include "qapi/qobject-output-visitor.h"
43 #include <nfsc/libnfs.h>
46 #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
47 #define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
48 #define QEMU_NFS_MAX_DEBUG_LEVEL 2
50 typedef struct NFSClient {
51 struct nfs_context *context;
55 AioContext *aio_context;
61 int64_t uid, gid, tcp_syncnt, readahead, pagecache, debug;
64 typedef struct NFSRPC {
74 static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
77 QueryParams *qp = NULL;
80 uri = uri_parse(filename);
82 error_setg(errp, "Invalid URI specified");
85 if (g_strcmp0(uri->scheme, "nfs") != 0) {
86 error_setg(errp, "URI scheme must be 'nfs'");
91 error_setg(errp, "missing hostname in URI");
96 error_setg(errp, "missing file path in URI");
100 qp = query_params_parse(uri->query);
102 error_setg(errp, "could not parse query parameters");
106 qdict_put_str(options, "server.host", uri->server);
107 qdict_put_str(options, "server.type", "inet");
108 qdict_put_str(options, "path", uri->path);
110 for (i = 0; i < qp->n; i++) {
111 unsigned long long val;
112 if (!qp->p[i].value) {
113 error_setg(errp, "Value for NFS parameter expected: %s",
117 if (parse_uint_full(qp->p[i].value, &val, 0)) {
118 error_setg(errp, "Illegal value for NFS parameter: %s",
122 if (!strcmp(qp->p[i].name, "uid")) {
123 qdict_put_str(options, "user", qp->p[i].value);
124 } else if (!strcmp(qp->p[i].name, "gid")) {
125 qdict_put_str(options, "group", qp->p[i].value);
126 } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
127 qdict_put_str(options, "tcp-syn-count", qp->p[i].value);
128 } else if (!strcmp(qp->p[i].name, "readahead")) {
129 qdict_put_str(options, "readahead-size", qp->p[i].value);
130 } else if (!strcmp(qp->p[i].name, "pagecache")) {
131 qdict_put_str(options, "page-cache-size", qp->p[i].value);
132 } else if (!strcmp(qp->p[i].name, "debug")) {
133 qdict_put_str(options, "debug", qp->p[i].value);
135 error_setg(errp, "Unknown NFS parameter name: %s",
143 query_params_free(qp);
151 static bool nfs_has_filename_options_conflict(QDict *options, Error **errp)
153 const QDictEntry *qe;
155 for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
156 if (!strcmp(qe->key, "host") ||
157 !strcmp(qe->key, "path") ||
158 !strcmp(qe->key, "user") ||
159 !strcmp(qe->key, "group") ||
160 !strcmp(qe->key, "tcp-syn-count") ||
161 !strcmp(qe->key, "readahead-size") ||
162 !strcmp(qe->key, "page-cache-size") ||
163 !strcmp(qe->key, "debug") ||
164 strstart(qe->key, "server.", NULL))
166 error_setg(errp, "Option %s cannot be used with a filename",
175 static void nfs_parse_filename(const char *filename, QDict *options,
178 if (nfs_has_filename_options_conflict(options, errp)) {
182 nfs_parse_uri(filename, options, errp);
185 static void nfs_process_read(void *arg);
186 static void nfs_process_write(void *arg);
188 /* Called with QemuMutex held. */
189 static void nfs_set_events(NFSClient *client)
191 int ev = nfs_which_events(client->context);
192 if (ev != client->events) {
193 aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
195 (ev & POLLIN) ? nfs_process_read : NULL,
196 (ev & POLLOUT) ? nfs_process_write : NULL,
203 static void nfs_process_read(void *arg)
205 NFSClient *client = arg;
207 qemu_mutex_lock(&client->mutex);
208 nfs_service(client->context, POLLIN);
209 nfs_set_events(client);
210 qemu_mutex_unlock(&client->mutex);
213 static void nfs_process_write(void *arg)
215 NFSClient *client = arg;
217 qemu_mutex_lock(&client->mutex);
218 nfs_service(client->context, POLLOUT);
219 nfs_set_events(client);
220 qemu_mutex_unlock(&client->mutex);
223 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
226 .co = qemu_coroutine_self(),
228 .client = bs->opaque,
232 static void nfs_co_generic_bh_cb(void *opaque)
234 NFSRPC *task = opaque;
237 aio_co_wake(task->co);
240 /* Called (via nfs_service) with QemuMutex held. */
242 nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
245 NFSRPC *task = private_data;
248 if (task->ret > 0 && task->iov) {
249 if (task->ret <= task->iov->size) {
250 qemu_iovec_from_buf(task->iov, 0, data, task->ret);
256 error_report("NFS Error: %s", nfs_get_error(nfs));
258 aio_bh_schedule_oneshot(task->client->aio_context,
259 nfs_co_generic_bh_cb, task);
262 static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
263 uint64_t bytes, QEMUIOVector *iov,
266 NFSClient *client = bs->opaque;
269 nfs_co_init_task(bs, &task);
272 qemu_mutex_lock(&client->mutex);
273 if (nfs_pread_async(client->context, client->fh,
274 offset, bytes, nfs_co_generic_cb, &task) != 0) {
275 qemu_mutex_unlock(&client->mutex);
279 nfs_set_events(client);
280 qemu_mutex_unlock(&client->mutex);
281 while (!task.complete) {
282 qemu_coroutine_yield();
289 /* zero pad short reads */
290 if (task.ret < iov->size) {
291 qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret);
297 static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
298 uint64_t bytes, QEMUIOVector *iov,
301 NFSClient *client = bs->opaque;
304 bool my_buffer = false;
306 nfs_co_init_task(bs, &task);
308 if (iov->niov != 1) {
309 buf = g_try_malloc(bytes);
310 if (bytes && buf == NULL) {
313 qemu_iovec_to_buf(iov, 0, buf, bytes);
316 buf = iov->iov[0].iov_base;
319 qemu_mutex_lock(&client->mutex);
320 if (nfs_pwrite_async(client->context, client->fh,
322 nfs_co_generic_cb, &task) != 0) {
323 qemu_mutex_unlock(&client->mutex);
330 nfs_set_events(client);
331 qemu_mutex_unlock(&client->mutex);
332 while (!task.complete) {
333 qemu_coroutine_yield();
340 if (task.ret != bytes) {
341 return task.ret < 0 ? task.ret : -EIO;
347 static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
349 NFSClient *client = bs->opaque;
352 nfs_co_init_task(bs, &task);
354 qemu_mutex_lock(&client->mutex);
355 if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
357 qemu_mutex_unlock(&client->mutex);
361 nfs_set_events(client);
362 qemu_mutex_unlock(&client->mutex);
363 while (!task.complete) {
364 qemu_coroutine_yield();
370 static QemuOptsList runtime_opts = {
372 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
376 .type = QEMU_OPT_STRING,
377 .help = "Path of the image on the host",
381 .type = QEMU_OPT_NUMBER,
382 .help = "UID value to use when talking to the server",
386 .type = QEMU_OPT_NUMBER,
387 .help = "GID value to use when talking to the server",
390 .name = "tcp-syn-count",
391 .type = QEMU_OPT_NUMBER,
392 .help = "Number of SYNs to send during the session establish",
395 .name = "readahead-size",
396 .type = QEMU_OPT_NUMBER,
397 .help = "Set the readahead size in bytes",
400 .name = "page-cache-size",
401 .type = QEMU_OPT_NUMBER,
402 .help = "Set the pagecache size in bytes",
406 .type = QEMU_OPT_NUMBER,
407 .help = "Set the NFS debug level (max 2)",
409 { /* end of list */ }
413 static void nfs_detach_aio_context(BlockDriverState *bs)
415 NFSClient *client = bs->opaque;
417 aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
418 false, NULL, NULL, NULL, NULL);
422 static void nfs_attach_aio_context(BlockDriverState *bs,
423 AioContext *new_context)
425 NFSClient *client = bs->opaque;
427 client->aio_context = new_context;
428 nfs_set_events(client);
431 static void nfs_client_close(NFSClient *client)
433 if (client->context) {
435 nfs_close(client->context, client->fh);
438 aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
439 false, NULL, NULL, NULL, NULL);
440 nfs_destroy_context(client->context);
441 client->context = NULL;
443 g_free(client->path);
444 qemu_mutex_destroy(&client->mutex);
445 qapi_free_NFSServer(client->server);
446 client->server = NULL;
449 static void nfs_file_close(BlockDriverState *bs)
451 NFSClient *client = bs->opaque;
452 nfs_client_close(client);
455 static NFSServer *nfs_config(QDict *options, Error **errp)
457 NFSServer *server = NULL;
459 QObject *crumpled_addr = NULL;
461 Error *local_error = NULL;
463 qdict_extract_subqdict(options, &addr, "server.");
464 if (!qdict_size(addr)) {
465 error_setg(errp, "NFS server address missing");
469 crumpled_addr = qdict_crumple(addr, errp);
470 if (!crumpled_addr) {
475 * Caution: this works only because all scalar members of
476 * NFSServer are QString in @crumpled_addr. The visitor expects
477 * @crumpled_addr to be typed according to the QAPI schema. It
478 * is when @options come from -blockdev or blockdev_add. But when
479 * they come from -drive, they're all QString.
481 iv = qobject_input_visitor_new(crumpled_addr);
482 visit_type_NFSServer(iv, NULL, &server, &local_error);
484 error_propagate(errp, local_error);
490 qobject_decref(crumpled_addr);
496 static int64_t nfs_client_open(NFSClient *client, QDict *options,
497 int flags, int open_flags, Error **errp)
500 QemuOpts *opts = NULL;
501 Error *local_err = NULL;
503 char *file = NULL, *strp = NULL;
505 qemu_mutex_init(&client->mutex);
506 opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
507 qemu_opts_absorb_qdict(opts, options, &local_err);
509 error_propagate(errp, local_err);
514 client->path = g_strdup(qemu_opt_get(opts, "path"));
517 error_setg(errp, "No path was specified");
521 strp = strrchr(client->path, '/');
523 error_setg(errp, "Invalid URL specified");
526 file = g_strdup(strp);
529 /* Pop the config into our state object, Exit if invalid */
530 client->server = nfs_config(options, errp);
531 if (!client->server) {
536 client->context = nfs_init_context();
537 if (client->context == NULL) {
538 error_setg(errp, "Failed to init NFS context");
542 if (qemu_opt_get(opts, "user")) {
543 client->uid = qemu_opt_get_number(opts, "user", 0);
544 nfs_set_uid(client->context, client->uid);
547 if (qemu_opt_get(opts, "group")) {
548 client->gid = qemu_opt_get_number(opts, "group", 0);
549 nfs_set_gid(client->context, client->gid);
552 if (qemu_opt_get(opts, "tcp-syn-count")) {
553 client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syn-count", 0);
554 nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
557 #ifdef LIBNFS_FEATURE_READAHEAD
558 if (qemu_opt_get(opts, "readahead-size")) {
559 if (open_flags & BDRV_O_NOCACHE) {
560 error_setg(errp, "Cannot enable NFS readahead "
561 "if cache.direct = on");
564 client->readahead = qemu_opt_get_number(opts, "readahead-size", 0);
565 if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
566 warn_report("Truncating NFS readahead size to %d",
567 QEMU_NFS_MAX_READAHEAD_SIZE);
568 client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
570 nfs_set_readahead(client->context, client->readahead);
571 #ifdef LIBNFS_FEATURE_PAGECACHE
572 nfs_set_pagecache_ttl(client->context, 0);
574 client->cache_used = true;
578 #ifdef LIBNFS_FEATURE_PAGECACHE
579 if (qemu_opt_get(opts, "page-cache-size")) {
580 if (open_flags & BDRV_O_NOCACHE) {
581 error_setg(errp, "Cannot enable NFS pagecache "
582 "if cache.direct = on");
585 client->pagecache = qemu_opt_get_number(opts, "page-cache-size", 0);
586 if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
587 warn_report("Truncating NFS pagecache size to %d pages",
588 QEMU_NFS_MAX_PAGECACHE_SIZE);
589 client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
591 nfs_set_pagecache(client->context, client->pagecache);
592 nfs_set_pagecache_ttl(client->context, 0);
593 client->cache_used = true;
597 #ifdef LIBNFS_FEATURE_DEBUG
598 if (qemu_opt_get(opts, "debug")) {
599 client->debug = qemu_opt_get_number(opts, "debug", 0);
600 /* limit the maximum debug level to avoid potential flooding
601 * of our log files. */
602 if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
603 warn_report("Limiting NFS debug level to %d",
604 QEMU_NFS_MAX_DEBUG_LEVEL);
605 client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
607 nfs_set_debug(client->context, client->debug);
611 ret = nfs_mount(client->context, client->server->host, client->path);
613 error_setg(errp, "Failed to mount nfs share: %s",
614 nfs_get_error(client->context));
618 if (flags & O_CREAT) {
619 ret = nfs_creat(client->context, file, 0600, &client->fh);
621 error_setg(errp, "Failed to create file: %s",
622 nfs_get_error(client->context));
626 ret = nfs_open(client->context, file, flags, &client->fh);
628 error_setg(errp, "Failed to open file : %s",
629 nfs_get_error(client->context));
634 ret = nfs_fstat(client->context, client->fh, &st);
636 error_setg(errp, "Failed to fstat file: %s",
637 nfs_get_error(client->context));
641 ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
642 client->st_blocks = st.st_blocks;
643 client->has_zero_init = S_ISREG(st.st_mode);
648 nfs_client_close(client);
655 static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
657 NFSClient *client = bs->opaque;
660 client->aio_context = bdrv_get_aio_context(bs);
662 ret = nfs_client_open(client, options,
663 (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
664 bs->open_flags, errp);
669 bs->total_sectors = ret;
674 static QemuOptsList nfs_create_opts = {
675 .name = "nfs-create-opts",
676 .head = QTAILQ_HEAD_INITIALIZER(nfs_create_opts.head),
679 .name = BLOCK_OPT_SIZE,
680 .type = QEMU_OPT_SIZE,
681 .help = "Virtual disk size"
683 { /* end of list */ }
687 static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
690 int64_t total_size = 0;
691 NFSClient *client = g_new0(NFSClient, 1);
692 QDict *options = NULL;
694 client->aio_context = qemu_get_aio_context();
696 /* Read out options */
697 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
700 options = qdict_new();
701 ret = nfs_parse_uri(url, options, errp);
706 ret = nfs_client_open(client, options, O_CREAT, 0, errp);
710 ret = nfs_ftruncate(client->context, client->fh, total_size);
711 nfs_client_close(client);
718 static int nfs_has_zero_init(BlockDriverState *bs)
720 NFSClient *client = bs->opaque;
721 return client->has_zero_init;
724 /* Called (via nfs_service) with QemuMutex held. */
726 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
729 NFSRPC *task = private_data;
731 if (task->ret == 0) {
732 memcpy(task->st, data, sizeof(struct stat));
735 error_report("NFS Error: %s", nfs_get_error(nfs));
738 /* Set task->complete before reading bs->wakeup. */
739 atomic_mb_set(&task->complete, 1);
740 bdrv_wakeup(task->bs);
743 static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
745 NFSClient *client = bs->opaque;
749 if (bdrv_is_read_only(bs) &&
750 !(bs->open_flags & BDRV_O_NOCACHE)) {
751 return client->st_blocks * 512;
756 if (nfs_fstat_async(client->context, client->fh, nfs_get_allocated_file_size_cb,
761 nfs_set_events(client);
762 BDRV_POLL_WHILE(bs, !task.complete);
764 return (task.ret < 0 ? task.ret : st.st_blocks * 512);
767 static int nfs_file_truncate(BlockDriverState *bs, int64_t offset,
768 PreallocMode prealloc, Error **errp)
770 NFSClient *client = bs->opaque;
773 if (prealloc != PREALLOC_MODE_OFF) {
774 error_setg(errp, "Unsupported preallocation mode '%s'",
775 PreallocMode_str(prealloc));
779 ret = nfs_ftruncate(client->context, client->fh, offset);
781 error_setg_errno(errp, -ret, "Failed to truncate file");
788 /* Note that this will not re-establish a connection with the NFS server
789 * - it is effectively a NOP. */
790 static int nfs_reopen_prepare(BDRVReopenState *state,
791 BlockReopenQueue *queue, Error **errp)
793 NFSClient *client = state->bs->opaque;
797 if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
798 error_setg(errp, "Cannot open a read-only mount as read-write");
802 if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) {
803 error_setg(errp, "Cannot disable cache if libnfs readahead or"
804 " pagecache is enabled");
808 /* Update cache for read-only reopens */
809 if (!(state->flags & BDRV_O_RDWR)) {
810 ret = nfs_fstat(client->context, client->fh, &st);
812 error_setg(errp, "Failed to fstat file: %s",
813 nfs_get_error(client->context));
816 client->st_blocks = st.st_blocks;
822 static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
824 NFSClient *client = bs->opaque;
825 QDict *opts = qdict_new();
826 QObject *server_qdict;
829 qdict_put_str(opts, "driver", "nfs");
831 if (client->uid && !client->gid) {
832 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
833 "nfs://%s%s?uid=%" PRId64, client->server->host, client->path,
835 } else if (!client->uid && client->gid) {
836 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
837 "nfs://%s%s?gid=%" PRId64, client->server->host, client->path,
839 } else if (client->uid && client->gid) {
840 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
841 "nfs://%s%s?uid=%" PRId64 "&gid=%" PRId64,
842 client->server->host, client->path, client->uid, client->gid);
844 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
845 "nfs://%s%s", client->server->host, client->path);
848 ov = qobject_output_visitor_new(&server_qdict);
849 visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
850 visit_complete(ov, &server_qdict);
851 qdict_put_obj(opts, "server", server_qdict);
852 qdict_put_str(opts, "path", client->path);
855 qdict_put_int(opts, "user", client->uid);
858 qdict_put_int(opts, "group", client->gid);
860 if (client->tcp_syncnt) {
861 qdict_put_int(opts, "tcp-syn-cnt", client->tcp_syncnt);
863 if (client->readahead) {
864 qdict_put_int(opts, "readahead-size", client->readahead);
866 if (client->pagecache) {
867 qdict_put_int(opts, "page-cache-size", client->pagecache);
870 qdict_put_int(opts, "debug", client->debug);
875 bs->full_open_options = opts;
878 #ifdef LIBNFS_FEATURE_PAGECACHE
879 static void nfs_invalidate_cache(BlockDriverState *bs,
882 NFSClient *client = bs->opaque;
883 nfs_pagecache_invalidate(client->context, client->fh);
887 static BlockDriver bdrv_nfs = {
888 .format_name = "nfs",
889 .protocol_name = "nfs",
891 .instance_size = sizeof(NFSClient),
892 .bdrv_parse_filename = nfs_parse_filename,
893 .create_opts = &nfs_create_opts,
895 .bdrv_has_zero_init = nfs_has_zero_init,
896 .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
897 .bdrv_truncate = nfs_file_truncate,
899 .bdrv_file_open = nfs_file_open,
900 .bdrv_close = nfs_file_close,
901 .bdrv_create = nfs_file_create,
902 .bdrv_reopen_prepare = nfs_reopen_prepare,
904 .bdrv_co_preadv = nfs_co_preadv,
905 .bdrv_co_pwritev = nfs_co_pwritev,
906 .bdrv_co_flush_to_disk = nfs_co_flush,
908 .bdrv_detach_aio_context = nfs_detach_aio_context,
909 .bdrv_attach_aio_context = nfs_attach_aio_context,
910 .bdrv_refresh_filename = nfs_refresh_filename,
912 #ifdef LIBNFS_FEATURE_PAGECACHE
913 .bdrv_invalidate_cache = nfs_invalidate_cache,
917 static void nfs_block_init(void)
919 bdrv_register(&bdrv_nfs);
922 block_init(nfs_block_init);