2 * QEMU Block driver for native access to files on NFS shares
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
28 #include "qemu/config-file.h"
29 #include "qemu/error-report.h"
30 #include "qapi/error.h"
31 #include "block/block_int.h"
32 #include "block/qdict.h"
35 #include "qemu/main-loop.h"
36 #include "qemu/module.h"
37 #include "qemu/option.h"
39 #include "qemu/cutils.h"
40 #include "sysemu/sysemu.h"
41 #include "sysemu/replay.h"
42 #include "qapi/qapi-visit-block-core.h"
43 #include "qapi/qmp/qdict.h"
44 #include "qapi/qmp/qstring.h"
45 #include "qapi/qobject-input-visitor.h"
46 #include "qapi/qobject-output-visitor.h"
47 #include <nfsc/libnfs.h>
50 #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
51 #define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
52 #define QEMU_NFS_MAX_DEBUG_LEVEL 2
54 typedef struct NFSClient {
55 struct nfs_context *context;
59 AioContext *aio_context;
65 int64_t uid, gid, tcp_syncnt, readahead, pagecache, debug;
68 typedef struct NFSRPC {
78 static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
81 QueryParams *qp = NULL;
84 uri = uri_parse(filename);
86 error_setg(errp, "Invalid URI specified");
89 if (g_strcmp0(uri->scheme, "nfs") != 0) {
90 error_setg(errp, "URI scheme must be 'nfs'");
95 error_setg(errp, "missing hostname in URI");
100 error_setg(errp, "missing file path in URI");
104 qp = query_params_parse(uri->query);
106 error_setg(errp, "could not parse query parameters");
110 qdict_put_str(options, "server.host", uri->server);
111 qdict_put_str(options, "server.type", "inet");
112 qdict_put_str(options, "path", uri->path);
114 for (i = 0; i < qp->n; i++) {
115 unsigned long long val;
116 if (!qp->p[i].value) {
117 error_setg(errp, "Value for NFS parameter expected: %s",
121 if (parse_uint_full(qp->p[i].value, &val, 0)) {
122 error_setg(errp, "Illegal value for NFS parameter: %s",
126 if (!strcmp(qp->p[i].name, "uid")) {
127 qdict_put_str(options, "user", qp->p[i].value);
128 } else if (!strcmp(qp->p[i].name, "gid")) {
129 qdict_put_str(options, "group", qp->p[i].value);
130 } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
131 qdict_put_str(options, "tcp-syn-count", qp->p[i].value);
132 } else if (!strcmp(qp->p[i].name, "readahead")) {
133 qdict_put_str(options, "readahead-size", qp->p[i].value);
134 } else if (!strcmp(qp->p[i].name, "pagecache")) {
135 qdict_put_str(options, "page-cache-size", qp->p[i].value);
136 } else if (!strcmp(qp->p[i].name, "debug")) {
137 qdict_put_str(options, "debug", qp->p[i].value);
139 error_setg(errp, "Unknown NFS parameter name: %s",
147 query_params_free(qp);
155 static bool nfs_has_filename_options_conflict(QDict *options, Error **errp)
157 const QDictEntry *qe;
159 for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
160 if (!strcmp(qe->key, "host") ||
161 !strcmp(qe->key, "path") ||
162 !strcmp(qe->key, "user") ||
163 !strcmp(qe->key, "group") ||
164 !strcmp(qe->key, "tcp-syn-count") ||
165 !strcmp(qe->key, "readahead-size") ||
166 !strcmp(qe->key, "page-cache-size") ||
167 !strcmp(qe->key, "debug") ||
168 strstart(qe->key, "server.", NULL))
170 error_setg(errp, "Option %s cannot be used with a filename",
179 static void nfs_parse_filename(const char *filename, QDict *options,
182 if (nfs_has_filename_options_conflict(options, errp)) {
186 nfs_parse_uri(filename, options, errp);
189 static void nfs_process_read(void *arg);
190 static void nfs_process_write(void *arg);
192 /* Called with QemuMutex held. */
193 static void nfs_set_events(NFSClient *client)
195 int ev = nfs_which_events(client->context);
196 if (ev != client->events) {
197 aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
199 (ev & POLLIN) ? nfs_process_read : NULL,
200 (ev & POLLOUT) ? nfs_process_write : NULL,
207 static void nfs_process_read(void *arg)
209 NFSClient *client = arg;
211 qemu_mutex_lock(&client->mutex);
212 nfs_service(client->context, POLLIN);
213 nfs_set_events(client);
214 qemu_mutex_unlock(&client->mutex);
217 static void nfs_process_write(void *arg)
219 NFSClient *client = arg;
221 qemu_mutex_lock(&client->mutex);
222 nfs_service(client->context, POLLOUT);
223 nfs_set_events(client);
224 qemu_mutex_unlock(&client->mutex);
227 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
230 .co = qemu_coroutine_self(),
232 .client = bs->opaque,
236 static void nfs_co_generic_bh_cb(void *opaque)
238 NFSRPC *task = opaque;
241 aio_co_wake(task->co);
244 /* Called (via nfs_service) with QemuMutex held. */
246 nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
249 NFSRPC *task = private_data;
252 if (task->ret > 0 && task->iov) {
253 if (task->ret <= task->iov->size) {
254 qemu_iovec_from_buf(task->iov, 0, data, task->ret);
260 error_report("NFS Error: %s", nfs_get_error(nfs));
262 replay_bh_schedule_oneshot_event(task->client->aio_context,
263 nfs_co_generic_bh_cb, task);
266 static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
267 uint64_t bytes, QEMUIOVector *iov,
270 NFSClient *client = bs->opaque;
273 nfs_co_init_task(bs, &task);
276 WITH_QEMU_LOCK_GUARD(&client->mutex) {
277 if (nfs_pread_async(client->context, client->fh,
278 offset, bytes, nfs_co_generic_cb, &task) != 0) {
282 nfs_set_events(client);
284 while (!task.complete) {
285 qemu_coroutine_yield();
292 /* zero pad short reads */
293 if (task.ret < iov->size) {
294 qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret);
300 static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
301 uint64_t bytes, QEMUIOVector *iov,
304 NFSClient *client = bs->opaque;
307 bool my_buffer = false;
309 nfs_co_init_task(bs, &task);
311 if (iov->niov != 1) {
312 buf = g_try_malloc(bytes);
313 if (bytes && buf == NULL) {
316 qemu_iovec_to_buf(iov, 0, buf, bytes);
319 buf = iov->iov[0].iov_base;
322 WITH_QEMU_LOCK_GUARD(&client->mutex) {
323 if (nfs_pwrite_async(client->context, client->fh,
325 nfs_co_generic_cb, &task) != 0) {
332 nfs_set_events(client);
334 while (!task.complete) {
335 qemu_coroutine_yield();
342 if (task.ret != bytes) {
343 return task.ret < 0 ? task.ret : -EIO;
349 static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
351 NFSClient *client = bs->opaque;
354 nfs_co_init_task(bs, &task);
356 WITH_QEMU_LOCK_GUARD(&client->mutex) {
357 if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
362 nfs_set_events(client);
364 while (!task.complete) {
365 qemu_coroutine_yield();
371 static void nfs_detach_aio_context(BlockDriverState *bs)
373 NFSClient *client = bs->opaque;
375 aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
376 false, NULL, NULL, NULL, NULL);
380 static void nfs_attach_aio_context(BlockDriverState *bs,
381 AioContext *new_context)
383 NFSClient *client = bs->opaque;
385 client->aio_context = new_context;
386 nfs_set_events(client);
389 static void nfs_client_close(NFSClient *client)
391 if (client->context) {
392 qemu_mutex_lock(&client->mutex);
393 aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
394 false, NULL, NULL, NULL, NULL);
395 qemu_mutex_unlock(&client->mutex);
397 nfs_close(client->context, client->fh);
400 #ifdef LIBNFS_FEATURE_UMOUNT
401 nfs_umount(client->context);
403 nfs_destroy_context(client->context);
404 client->context = NULL;
406 g_free(client->path);
407 qemu_mutex_destroy(&client->mutex);
408 qapi_free_NFSServer(client->server);
409 client->server = NULL;
412 static void nfs_file_close(BlockDriverState *bs)
414 NFSClient *client = bs->opaque;
415 nfs_client_close(client);
418 static int64_t nfs_client_open(NFSClient *client, BlockdevOptionsNfs *opts,
419 int flags, int open_flags, Error **errp)
421 int64_t ret = -EINVAL;
423 char *file = NULL, *strp = NULL;
425 qemu_mutex_init(&client->mutex);
427 client->path = g_strdup(opts->path);
429 strp = strrchr(client->path, '/');
431 error_setg(errp, "Invalid URL specified");
434 file = g_strdup(strp);
437 /* Steal the NFSServer object from opts; set the original pointer to NULL
438 * to avoid use after free and double free. */
439 client->server = opts->server;
442 client->context = nfs_init_context();
443 if (client->context == NULL) {
444 error_setg(errp, "Failed to init NFS context");
448 if (opts->has_user) {
449 client->uid = opts->user;
450 nfs_set_uid(client->context, client->uid);
453 if (opts->has_group) {
454 client->gid = opts->group;
455 nfs_set_gid(client->context, client->gid);
458 if (opts->has_tcp_syn_count) {
459 client->tcp_syncnt = opts->tcp_syn_count;
460 nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
463 #ifdef LIBNFS_FEATURE_READAHEAD
464 if (opts->has_readahead_size) {
465 if (open_flags & BDRV_O_NOCACHE) {
466 error_setg(errp, "Cannot enable NFS readahead "
467 "if cache.direct = on");
470 client->readahead = opts->readahead_size;
471 if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
472 warn_report("Truncating NFS readahead size to %d",
473 QEMU_NFS_MAX_READAHEAD_SIZE);
474 client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
476 nfs_set_readahead(client->context, client->readahead);
477 #ifdef LIBNFS_FEATURE_PAGECACHE
478 nfs_set_pagecache_ttl(client->context, 0);
480 client->cache_used = true;
484 #ifdef LIBNFS_FEATURE_PAGECACHE
485 if (opts->has_page_cache_size) {
486 if (open_flags & BDRV_O_NOCACHE) {
487 error_setg(errp, "Cannot enable NFS pagecache "
488 "if cache.direct = on");
491 client->pagecache = opts->page_cache_size;
492 if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
493 warn_report("Truncating NFS pagecache size to %d pages",
494 QEMU_NFS_MAX_PAGECACHE_SIZE);
495 client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
497 nfs_set_pagecache(client->context, client->pagecache);
498 nfs_set_pagecache_ttl(client->context, 0);
499 client->cache_used = true;
503 #ifdef LIBNFS_FEATURE_DEBUG
504 if (opts->has_debug) {
505 client->debug = opts->debug;
506 /* limit the maximum debug level to avoid potential flooding
507 * of our log files. */
508 if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
509 warn_report("Limiting NFS debug level to %d",
510 QEMU_NFS_MAX_DEBUG_LEVEL);
511 client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
513 nfs_set_debug(client->context, client->debug);
517 ret = nfs_mount(client->context, client->server->host, client->path);
519 error_setg(errp, "Failed to mount nfs share: %s",
520 nfs_get_error(client->context));
524 if (flags & O_CREAT) {
525 ret = nfs_creat(client->context, file, 0600, &client->fh);
527 error_setg(errp, "Failed to create file: %s",
528 nfs_get_error(client->context));
532 ret = nfs_open(client->context, file, flags, &client->fh);
534 error_setg(errp, "Failed to open file : %s",
535 nfs_get_error(client->context));
540 ret = nfs_fstat(client->context, client->fh, &st);
542 error_setg(errp, "Failed to fstat file: %s",
543 nfs_get_error(client->context));
547 ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
548 client->st_blocks = st.st_blocks;
549 client->has_zero_init = S_ISREG(st.st_mode);
554 nfs_client_close(client);
560 static BlockdevOptionsNfs *nfs_options_qdict_to_qapi(QDict *options,
563 BlockdevOptionsNfs *opts = NULL;
566 Error *local_err = NULL;
568 v = qobject_input_visitor_new_flat_confused(options, errp);
573 visit_type_BlockdevOptionsNfs(v, NULL, &opts, &local_err);
577 error_propagate(errp, local_err);
581 /* Remove the processed options from the QDict (the visitor processes
582 * _all_ options in the QDict) */
583 while ((e = qdict_first(options))) {
584 qdict_del(options, e->key);
590 static int64_t nfs_client_open_qdict(NFSClient *client, QDict *options,
591 int flags, int open_flags, Error **errp)
593 BlockdevOptionsNfs *opts;
596 opts = nfs_options_qdict_to_qapi(options, errp);
602 ret = nfs_client_open(client, opts, flags, open_flags, errp);
604 qapi_free_BlockdevOptionsNfs(opts);
608 static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
610 NFSClient *client = bs->opaque;
613 client->aio_context = bdrv_get_aio_context(bs);
615 ret = nfs_client_open_qdict(client, options,
616 (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
617 bs->open_flags, errp);
622 bs->total_sectors = ret;
626 static QemuOptsList nfs_create_opts = {
627 .name = "nfs-create-opts",
628 .head = QTAILQ_HEAD_INITIALIZER(nfs_create_opts.head),
631 .name = BLOCK_OPT_SIZE,
632 .type = QEMU_OPT_SIZE,
633 .help = "Virtual disk size"
635 { /* end of list */ }
639 static int nfs_file_co_create(BlockdevCreateOptions *options, Error **errp)
641 BlockdevCreateOptionsNfs *opts = &options->u.nfs;
642 NFSClient *client = g_new0(NFSClient, 1);
645 assert(options->driver == BLOCKDEV_DRIVER_NFS);
647 client->aio_context = qemu_get_aio_context();
649 ret = nfs_client_open(client, opts->location, O_CREAT, 0, errp);
653 ret = nfs_ftruncate(client->context, client->fh, opts->size);
654 nfs_client_close(client);
661 static int coroutine_fn nfs_file_co_create_opts(BlockDriver *drv,
666 BlockdevCreateOptions *create_options;
667 BlockdevCreateOptionsNfs *nfs_opts;
671 create_options = g_new0(BlockdevCreateOptions, 1);
672 create_options->driver = BLOCKDEV_DRIVER_NFS;
673 nfs_opts = &create_options->u.nfs;
675 /* Read out options */
676 nfs_opts->size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
679 options = qdict_new();
680 ret = nfs_parse_uri(url, options, errp);
685 nfs_opts->location = nfs_options_qdict_to_qapi(options, errp);
686 if (nfs_opts->location == NULL) {
691 ret = nfs_file_co_create(create_options, errp);
698 qobject_unref(options);
699 qapi_free_BlockdevCreateOptions(create_options);
703 static int nfs_has_zero_init(BlockDriverState *bs)
705 NFSClient *client = bs->opaque;
706 return client->has_zero_init;
709 /* Called (via nfs_service) with QemuMutex held. */
711 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
714 NFSRPC *task = private_data;
716 if (task->ret == 0) {
717 memcpy(task->st, data, sizeof(struct stat));
720 error_report("NFS Error: %s", nfs_get_error(nfs));
723 /* Set task->complete before reading bs->wakeup. */
724 atomic_mb_set(&task->complete, 1);
725 bdrv_wakeup(task->bs);
728 static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
730 NFSClient *client = bs->opaque;
734 if (bdrv_is_read_only(bs) &&
735 !(bs->open_flags & BDRV_O_NOCACHE)) {
736 return client->st_blocks * 512;
741 if (nfs_fstat_async(client->context, client->fh, nfs_get_allocated_file_size_cb,
746 nfs_set_events(client);
747 BDRV_POLL_WHILE(bs, !task.complete);
749 return (task.ret < 0 ? task.ret : st.st_blocks * 512);
752 static int coroutine_fn
753 nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
754 PreallocMode prealloc, BdrvRequestFlags flags,
757 NFSClient *client = bs->opaque;
760 if (prealloc != PREALLOC_MODE_OFF) {
761 error_setg(errp, "Unsupported preallocation mode '%s'",
762 PreallocMode_str(prealloc));
766 ret = nfs_ftruncate(client->context, client->fh, offset);
768 error_setg_errno(errp, -ret, "Failed to truncate file");
775 /* Note that this will not re-establish a connection with the NFS server
776 * - it is effectively a NOP. */
777 static int nfs_reopen_prepare(BDRVReopenState *state,
778 BlockReopenQueue *queue, Error **errp)
780 NFSClient *client = state->bs->opaque;
784 if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
785 error_setg(errp, "Cannot open a read-only mount as read-write");
789 if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) {
790 error_setg(errp, "Cannot disable cache if libnfs readahead or"
791 " pagecache is enabled");
795 /* Update cache for read-only reopens */
796 if (!(state->flags & BDRV_O_RDWR)) {
797 ret = nfs_fstat(client->context, client->fh, &st);
799 error_setg(errp, "Failed to fstat file: %s",
800 nfs_get_error(client->context));
803 client->st_blocks = st.st_blocks;
809 static void nfs_refresh_filename(BlockDriverState *bs)
811 NFSClient *client = bs->opaque;
813 if (client->uid && !client->gid) {
814 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
815 "nfs://%s%s?uid=%" PRId64, client->server->host, client->path,
817 } else if (!client->uid && client->gid) {
818 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
819 "nfs://%s%s?gid=%" PRId64, client->server->host, client->path,
821 } else if (client->uid && client->gid) {
822 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
823 "nfs://%s%s?uid=%" PRId64 "&gid=%" PRId64,
824 client->server->host, client->path, client->uid, client->gid);
826 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
827 "nfs://%s%s", client->server->host, client->path);
831 static char *nfs_dirname(BlockDriverState *bs, Error **errp)
833 NFSClient *client = bs->opaque;
835 if (client->uid || client->gid) {
836 bdrv_refresh_filename(bs);
837 error_setg(errp, "Cannot generate a base directory for NFS node '%s'",
842 return g_strdup_printf("nfs://%s%s/", client->server->host, client->path);
845 #ifdef LIBNFS_FEATURE_PAGECACHE
846 static void coroutine_fn nfs_co_invalidate_cache(BlockDriverState *bs,
849 NFSClient *client = bs->opaque;
850 nfs_pagecache_invalidate(client->context, client->fh);
854 static const char *nfs_strong_runtime_opts[] = {
863 static BlockDriver bdrv_nfs = {
864 .format_name = "nfs",
865 .protocol_name = "nfs",
867 .instance_size = sizeof(NFSClient),
868 .bdrv_parse_filename = nfs_parse_filename,
869 .create_opts = &nfs_create_opts,
871 .bdrv_has_zero_init = nfs_has_zero_init,
872 .bdrv_has_zero_init_truncate = nfs_has_zero_init,
873 .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
874 .bdrv_co_truncate = nfs_file_co_truncate,
876 .bdrv_file_open = nfs_file_open,
877 .bdrv_close = nfs_file_close,
878 .bdrv_co_create = nfs_file_co_create,
879 .bdrv_co_create_opts = nfs_file_co_create_opts,
880 .bdrv_reopen_prepare = nfs_reopen_prepare,
882 .bdrv_co_preadv = nfs_co_preadv,
883 .bdrv_co_pwritev = nfs_co_pwritev,
884 .bdrv_co_flush_to_disk = nfs_co_flush,
886 .bdrv_detach_aio_context = nfs_detach_aio_context,
887 .bdrv_attach_aio_context = nfs_attach_aio_context,
888 .bdrv_refresh_filename = nfs_refresh_filename,
889 .bdrv_dirname = nfs_dirname,
891 .strong_runtime_opts = nfs_strong_runtime_opts,
893 #ifdef LIBNFS_FEATURE_PAGECACHE
894 .bdrv_co_invalidate_cache = nfs_co_invalidate_cache,
898 static void nfs_block_init(void)
900 bdrv_register(&bdrv_nfs);
903 block_init(nfs_block_init);