2 * QEMU Block driver for native access to files on NFS shares
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
28 #include "qemu/config-file.h"
29 #include "qemu/error-report.h"
30 #include "qapi/error.h"
31 #include "block/block_int.h"
32 #include "block/qdict.h"
35 #include "qemu/main-loop.h"
36 #include "qemu/module.h"
37 #include "qemu/option.h"
39 #include "qemu/cutils.h"
40 #include "sysemu/sysemu.h"
41 #include "sysemu/replay.h"
42 #include "qapi/qapi-visit-block-core.h"
43 #include "qapi/qmp/qdict.h"
44 #include "qapi/qmp/qstring.h"
45 #include "qapi/qobject-input-visitor.h"
46 #include "qapi/qobject-output-visitor.h"
47 #include <nfsc/libnfs.h>
50 #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
51 #define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
52 #define QEMU_NFS_MAX_DEBUG_LEVEL 2
54 typedef struct NFSClient {
55 struct nfs_context *context;
59 AioContext *aio_context;
65 int64_t uid, gid, tcp_syncnt, readahead, pagecache, debug;
68 typedef struct NFSRPC {
78 static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
81 QueryParams *qp = NULL;
84 uri = uri_parse(filename);
86 error_setg(errp, "Invalid URI specified");
89 if (g_strcmp0(uri->scheme, "nfs") != 0) {
90 error_setg(errp, "URI scheme must be 'nfs'");
95 error_setg(errp, "missing hostname in URI");
100 error_setg(errp, "missing file path in URI");
104 qp = query_params_parse(uri->query);
106 error_setg(errp, "could not parse query parameters");
110 qdict_put_str(options, "server.host", uri->server);
111 qdict_put_str(options, "server.type", "inet");
112 qdict_put_str(options, "path", uri->path);
114 for (i = 0; i < qp->n; i++) {
115 unsigned long long val;
116 if (!qp->p[i].value) {
117 error_setg(errp, "Value for NFS parameter expected: %s",
121 if (parse_uint_full(qp->p[i].value, &val, 0)) {
122 error_setg(errp, "Illegal value for NFS parameter: %s",
126 if (!strcmp(qp->p[i].name, "uid")) {
127 qdict_put_str(options, "user", qp->p[i].value);
128 } else if (!strcmp(qp->p[i].name, "gid")) {
129 qdict_put_str(options, "group", qp->p[i].value);
130 } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
131 qdict_put_str(options, "tcp-syn-count", qp->p[i].value);
132 } else if (!strcmp(qp->p[i].name, "readahead")) {
133 qdict_put_str(options, "readahead-size", qp->p[i].value);
134 } else if (!strcmp(qp->p[i].name, "pagecache")) {
135 qdict_put_str(options, "page-cache-size", qp->p[i].value);
136 } else if (!strcmp(qp->p[i].name, "debug")) {
137 qdict_put_str(options, "debug", qp->p[i].value);
139 error_setg(errp, "Unknown NFS parameter name: %s",
147 query_params_free(qp);
155 static bool nfs_has_filename_options_conflict(QDict *options, Error **errp)
157 const QDictEntry *qe;
159 for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
160 if (!strcmp(qe->key, "host") ||
161 !strcmp(qe->key, "path") ||
162 !strcmp(qe->key, "user") ||
163 !strcmp(qe->key, "group") ||
164 !strcmp(qe->key, "tcp-syn-count") ||
165 !strcmp(qe->key, "readahead-size") ||
166 !strcmp(qe->key, "page-cache-size") ||
167 !strcmp(qe->key, "debug") ||
168 strstart(qe->key, "server.", NULL))
170 error_setg(errp, "Option %s cannot be used with a filename",
179 static void nfs_parse_filename(const char *filename, QDict *options,
182 if (nfs_has_filename_options_conflict(options, errp)) {
186 nfs_parse_uri(filename, options, errp);
189 static void nfs_process_read(void *arg);
190 static void nfs_process_write(void *arg);
192 /* Called with QemuMutex held. */
193 static void nfs_set_events(NFSClient *client)
195 int ev = nfs_which_events(client->context);
196 if (ev != client->events) {
197 aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
199 (ev & POLLIN) ? nfs_process_read : NULL,
200 (ev & POLLOUT) ? nfs_process_write : NULL,
207 static void nfs_process_read(void *arg)
209 NFSClient *client = arg;
211 qemu_mutex_lock(&client->mutex);
212 nfs_service(client->context, POLLIN);
213 nfs_set_events(client);
214 qemu_mutex_unlock(&client->mutex);
217 static void nfs_process_write(void *arg)
219 NFSClient *client = arg;
221 qemu_mutex_lock(&client->mutex);
222 nfs_service(client->context, POLLOUT);
223 nfs_set_events(client);
224 qemu_mutex_unlock(&client->mutex);
227 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
230 .co = qemu_coroutine_self(),
232 .client = bs->opaque,
236 static void nfs_co_generic_bh_cb(void *opaque)
238 NFSRPC *task = opaque;
241 aio_co_wake(task->co);
244 /* Called (via nfs_service) with QemuMutex held. */
246 nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
249 NFSRPC *task = private_data;
252 if (task->ret > 0 && task->iov) {
253 if (task->ret <= task->iov->size) {
254 qemu_iovec_from_buf(task->iov, 0, data, task->ret);
260 error_report("NFS Error: %s", nfs_get_error(nfs));
262 replay_bh_schedule_oneshot_event(task->client->aio_context,
263 nfs_co_generic_bh_cb, task);
266 static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
267 uint64_t bytes, QEMUIOVector *iov,
270 NFSClient *client = bs->opaque;
273 nfs_co_init_task(bs, &task);
276 qemu_mutex_lock(&client->mutex);
277 if (nfs_pread_async(client->context, client->fh,
278 offset, bytes, nfs_co_generic_cb, &task) != 0) {
279 qemu_mutex_unlock(&client->mutex);
283 nfs_set_events(client);
284 qemu_mutex_unlock(&client->mutex);
285 while (!task.complete) {
286 qemu_coroutine_yield();
293 /* zero pad short reads */
294 if (task.ret < iov->size) {
295 qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret);
301 static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
302 uint64_t bytes, QEMUIOVector *iov,
305 NFSClient *client = bs->opaque;
308 bool my_buffer = false;
310 nfs_co_init_task(bs, &task);
312 if (iov->niov != 1) {
313 buf = g_try_malloc(bytes);
314 if (bytes && buf == NULL) {
317 qemu_iovec_to_buf(iov, 0, buf, bytes);
320 buf = iov->iov[0].iov_base;
323 qemu_mutex_lock(&client->mutex);
324 if (nfs_pwrite_async(client->context, client->fh,
326 nfs_co_generic_cb, &task) != 0) {
327 qemu_mutex_unlock(&client->mutex);
334 nfs_set_events(client);
335 qemu_mutex_unlock(&client->mutex);
336 while (!task.complete) {
337 qemu_coroutine_yield();
344 if (task.ret != bytes) {
345 return task.ret < 0 ? task.ret : -EIO;
351 static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
353 NFSClient *client = bs->opaque;
356 nfs_co_init_task(bs, &task);
358 qemu_mutex_lock(&client->mutex);
359 if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
361 qemu_mutex_unlock(&client->mutex);
365 nfs_set_events(client);
366 qemu_mutex_unlock(&client->mutex);
367 while (!task.complete) {
368 qemu_coroutine_yield();
374 static void nfs_detach_aio_context(BlockDriverState *bs)
376 NFSClient *client = bs->opaque;
378 aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
379 false, NULL, NULL, NULL, NULL);
383 static void nfs_attach_aio_context(BlockDriverState *bs,
384 AioContext *new_context)
386 NFSClient *client = bs->opaque;
388 client->aio_context = new_context;
389 nfs_set_events(client);
392 static void nfs_client_close(NFSClient *client)
394 if (client->context) {
395 qemu_mutex_lock(&client->mutex);
396 aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
397 false, NULL, NULL, NULL, NULL);
398 qemu_mutex_unlock(&client->mutex);
400 nfs_close(client->context, client->fh);
403 #ifdef LIBNFS_FEATURE_UMOUNT
404 nfs_umount(client->context);
406 nfs_destroy_context(client->context);
407 client->context = NULL;
409 g_free(client->path);
410 qemu_mutex_destroy(&client->mutex);
411 qapi_free_NFSServer(client->server);
412 client->server = NULL;
415 static void nfs_file_close(BlockDriverState *bs)
417 NFSClient *client = bs->opaque;
418 nfs_client_close(client);
421 static int64_t nfs_client_open(NFSClient *client, BlockdevOptionsNfs *opts,
422 int flags, int open_flags, Error **errp)
424 int64_t ret = -EINVAL;
426 char *file = NULL, *strp = NULL;
428 qemu_mutex_init(&client->mutex);
430 client->path = g_strdup(opts->path);
432 strp = strrchr(client->path, '/');
434 error_setg(errp, "Invalid URL specified");
437 file = g_strdup(strp);
440 /* Steal the NFSServer object from opts; set the original pointer to NULL
441 * to avoid use after free and double free. */
442 client->server = opts->server;
445 client->context = nfs_init_context();
446 if (client->context == NULL) {
447 error_setg(errp, "Failed to init NFS context");
451 if (opts->has_user) {
452 client->uid = opts->user;
453 nfs_set_uid(client->context, client->uid);
456 if (opts->has_group) {
457 client->gid = opts->group;
458 nfs_set_gid(client->context, client->gid);
461 if (opts->has_tcp_syn_count) {
462 client->tcp_syncnt = opts->tcp_syn_count;
463 nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
466 #ifdef LIBNFS_FEATURE_READAHEAD
467 if (opts->has_readahead_size) {
468 if (open_flags & BDRV_O_NOCACHE) {
469 error_setg(errp, "Cannot enable NFS readahead "
470 "if cache.direct = on");
473 client->readahead = opts->readahead_size;
474 if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
475 warn_report("Truncating NFS readahead size to %d",
476 QEMU_NFS_MAX_READAHEAD_SIZE);
477 client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
479 nfs_set_readahead(client->context, client->readahead);
480 #ifdef LIBNFS_FEATURE_PAGECACHE
481 nfs_set_pagecache_ttl(client->context, 0);
483 client->cache_used = true;
487 #ifdef LIBNFS_FEATURE_PAGECACHE
488 if (opts->has_page_cache_size) {
489 if (open_flags & BDRV_O_NOCACHE) {
490 error_setg(errp, "Cannot enable NFS pagecache "
491 "if cache.direct = on");
494 client->pagecache = opts->page_cache_size;
495 if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
496 warn_report("Truncating NFS pagecache size to %d pages",
497 QEMU_NFS_MAX_PAGECACHE_SIZE);
498 client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
500 nfs_set_pagecache(client->context, client->pagecache);
501 nfs_set_pagecache_ttl(client->context, 0);
502 client->cache_used = true;
506 #ifdef LIBNFS_FEATURE_DEBUG
507 if (opts->has_debug) {
508 client->debug = opts->debug;
509 /* limit the maximum debug level to avoid potential flooding
510 * of our log files. */
511 if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
512 warn_report("Limiting NFS debug level to %d",
513 QEMU_NFS_MAX_DEBUG_LEVEL);
514 client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
516 nfs_set_debug(client->context, client->debug);
520 ret = nfs_mount(client->context, client->server->host, client->path);
522 error_setg(errp, "Failed to mount nfs share: %s",
523 nfs_get_error(client->context));
527 if (flags & O_CREAT) {
528 ret = nfs_creat(client->context, file, 0600, &client->fh);
530 error_setg(errp, "Failed to create file: %s",
531 nfs_get_error(client->context));
535 ret = nfs_open(client->context, file, flags, &client->fh);
537 error_setg(errp, "Failed to open file : %s",
538 nfs_get_error(client->context));
543 ret = nfs_fstat(client->context, client->fh, &st);
545 error_setg(errp, "Failed to fstat file: %s",
546 nfs_get_error(client->context));
550 ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
551 client->st_blocks = st.st_blocks;
552 client->has_zero_init = S_ISREG(st.st_mode);
557 nfs_client_close(client);
563 static BlockdevOptionsNfs *nfs_options_qdict_to_qapi(QDict *options,
566 BlockdevOptionsNfs *opts = NULL;
569 Error *local_err = NULL;
571 v = qobject_input_visitor_new_flat_confused(options, errp);
576 visit_type_BlockdevOptionsNfs(v, NULL, &opts, &local_err);
580 error_propagate(errp, local_err);
584 /* Remove the processed options from the QDict (the visitor processes
585 * _all_ options in the QDict) */
586 while ((e = qdict_first(options))) {
587 qdict_del(options, e->key);
593 static int64_t nfs_client_open_qdict(NFSClient *client, QDict *options,
594 int flags, int open_flags, Error **errp)
596 BlockdevOptionsNfs *opts;
599 opts = nfs_options_qdict_to_qapi(options, errp);
605 ret = nfs_client_open(client, opts, flags, open_flags, errp);
607 qapi_free_BlockdevOptionsNfs(opts);
611 static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
613 NFSClient *client = bs->opaque;
616 client->aio_context = bdrv_get_aio_context(bs);
618 ret = nfs_client_open_qdict(client, options,
619 (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
620 bs->open_flags, errp);
625 bs->total_sectors = ret;
630 static QemuOptsList nfs_create_opts = {
631 .name = "nfs-create-opts",
632 .head = QTAILQ_HEAD_INITIALIZER(nfs_create_opts.head),
635 .name = BLOCK_OPT_SIZE,
636 .type = QEMU_OPT_SIZE,
637 .help = "Virtual disk size"
639 { /* end of list */ }
643 static int nfs_file_co_create(BlockdevCreateOptions *options, Error **errp)
645 BlockdevCreateOptionsNfs *opts = &options->u.nfs;
646 NFSClient *client = g_new0(NFSClient, 1);
649 assert(options->driver == BLOCKDEV_DRIVER_NFS);
651 client->aio_context = qemu_get_aio_context();
653 ret = nfs_client_open(client, opts->location, O_CREAT, 0, errp);
657 ret = nfs_ftruncate(client->context, client->fh, opts->size);
658 nfs_client_close(client);
665 static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts,
668 BlockdevCreateOptions *create_options;
669 BlockdevCreateOptionsNfs *nfs_opts;
673 create_options = g_new0(BlockdevCreateOptions, 1);
674 create_options->driver = BLOCKDEV_DRIVER_NFS;
675 nfs_opts = &create_options->u.nfs;
677 /* Read out options */
678 nfs_opts->size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
681 options = qdict_new();
682 ret = nfs_parse_uri(url, options, errp);
687 nfs_opts->location = nfs_options_qdict_to_qapi(options, errp);
688 if (nfs_opts->location == NULL) {
693 ret = nfs_file_co_create(create_options, errp);
700 qobject_unref(options);
701 qapi_free_BlockdevCreateOptions(create_options);
705 static int nfs_has_zero_init(BlockDriverState *bs)
707 NFSClient *client = bs->opaque;
708 return client->has_zero_init;
711 /* Called (via nfs_service) with QemuMutex held. */
713 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
716 NFSRPC *task = private_data;
718 if (task->ret == 0) {
719 memcpy(task->st, data, sizeof(struct stat));
722 error_report("NFS Error: %s", nfs_get_error(nfs));
725 /* Set task->complete before reading bs->wakeup. */
726 atomic_mb_set(&task->complete, 1);
727 bdrv_wakeup(task->bs);
730 static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
732 NFSClient *client = bs->opaque;
736 if (bdrv_is_read_only(bs) &&
737 !(bs->open_flags & BDRV_O_NOCACHE)) {
738 return client->st_blocks * 512;
743 if (nfs_fstat_async(client->context, client->fh, nfs_get_allocated_file_size_cb,
748 nfs_set_events(client);
749 BDRV_POLL_WHILE(bs, !task.complete);
751 return (task.ret < 0 ? task.ret : st.st_blocks * 512);
754 static int coroutine_fn
755 nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
756 PreallocMode prealloc, Error **errp)
758 NFSClient *client = bs->opaque;
761 if (prealloc != PREALLOC_MODE_OFF) {
762 error_setg(errp, "Unsupported preallocation mode '%s'",
763 PreallocMode_str(prealloc));
767 ret = nfs_ftruncate(client->context, client->fh, offset);
769 error_setg_errno(errp, -ret, "Failed to truncate file");
776 /* Note that this will not re-establish a connection with the NFS server
777 * - it is effectively a NOP. */
778 static int nfs_reopen_prepare(BDRVReopenState *state,
779 BlockReopenQueue *queue, Error **errp)
781 NFSClient *client = state->bs->opaque;
785 if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
786 error_setg(errp, "Cannot open a read-only mount as read-write");
790 if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) {
791 error_setg(errp, "Cannot disable cache if libnfs readahead or"
792 " pagecache is enabled");
796 /* Update cache for read-only reopens */
797 if (!(state->flags & BDRV_O_RDWR)) {
798 ret = nfs_fstat(client->context, client->fh, &st);
800 error_setg(errp, "Failed to fstat file: %s",
801 nfs_get_error(client->context));
804 client->st_blocks = st.st_blocks;
810 static void nfs_refresh_filename(BlockDriverState *bs)
812 NFSClient *client = bs->opaque;
814 if (client->uid && !client->gid) {
815 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
816 "nfs://%s%s?uid=%" PRId64, client->server->host, client->path,
818 } else if (!client->uid && client->gid) {
819 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
820 "nfs://%s%s?gid=%" PRId64, client->server->host, client->path,
822 } else if (client->uid && client->gid) {
823 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
824 "nfs://%s%s?uid=%" PRId64 "&gid=%" PRId64,
825 client->server->host, client->path, client->uid, client->gid);
827 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
828 "nfs://%s%s", client->server->host, client->path);
832 static char *nfs_dirname(BlockDriverState *bs, Error **errp)
834 NFSClient *client = bs->opaque;
836 if (client->uid || client->gid) {
837 bdrv_refresh_filename(bs);
838 error_setg(errp, "Cannot generate a base directory for NFS node '%s'",
843 return g_strdup_printf("nfs://%s%s/", client->server->host, client->path);
846 #ifdef LIBNFS_FEATURE_PAGECACHE
847 static void coroutine_fn nfs_co_invalidate_cache(BlockDriverState *bs,
850 NFSClient *client = bs->opaque;
851 nfs_pagecache_invalidate(client->context, client->fh);
855 static const char *nfs_strong_runtime_opts[] = {
864 static BlockDriver bdrv_nfs = {
865 .format_name = "nfs",
866 .protocol_name = "nfs",
868 .instance_size = sizeof(NFSClient),
869 .bdrv_parse_filename = nfs_parse_filename,
870 .create_opts = &nfs_create_opts,
872 .bdrv_has_zero_init = nfs_has_zero_init,
873 .bdrv_has_zero_init_truncate = nfs_has_zero_init,
874 .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
875 .bdrv_co_truncate = nfs_file_co_truncate,
877 .bdrv_file_open = nfs_file_open,
878 .bdrv_close = nfs_file_close,
879 .bdrv_co_create = nfs_file_co_create,
880 .bdrv_co_create_opts = nfs_file_co_create_opts,
881 .bdrv_reopen_prepare = nfs_reopen_prepare,
883 .bdrv_co_preadv = nfs_co_preadv,
884 .bdrv_co_pwritev = nfs_co_pwritev,
885 .bdrv_co_flush_to_disk = nfs_co_flush,
887 .bdrv_detach_aio_context = nfs_detach_aio_context,
888 .bdrv_attach_aio_context = nfs_attach_aio_context,
889 .bdrv_refresh_filename = nfs_refresh_filename,
890 .bdrv_dirname = nfs_dirname,
892 .strong_runtime_opts = nfs_strong_runtime_opts,
894 #ifdef LIBNFS_FEATURE_PAGECACHE
895 .bdrv_co_invalidate_cache = nfs_co_invalidate_cache,
899 static void nfs_block_init(void)
901 bdrv_register(&bdrv_nfs);
904 block_init(nfs_block_init);