1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* kiocb-using read/write
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
8 #include <linux/mount.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/bio.h>
13 #include <linux/falloc.h>
14 #include <linux/sched/mm.h>
15 #include <trace/events/fscache.h>
18 struct cachefiles_kiocb {
26 struct cachefiles_object *object;
27 netfs_io_terminated_t term_func;
30 unsigned int inval_counter; /* Copy of cookie->inval_counter */
34 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
36 if (refcount_dec_and_test(&ki->ki_refcnt)) {
37 cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
38 fput(ki->iocb.ki_filp);
44 * Handle completion of a read from the cache.
46 static void cachefiles_read_complete(struct kiocb *iocb, long ret)
48 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
49 struct inode *inode = file_inode(ki->iocb.ki_filp);
54 trace_cachefiles_io_error(ki->object, inode, ret,
55 cachefiles_trace_read_error);
59 if (ki->object->cookie->inval_counter == ki->inval_counter)
65 ki->term_func(ki->term_func_priv, ret, ki->was_async);
68 cachefiles_put_kiocb(ki);
72 * Initiate a read from the cache.
74 static int cachefiles_read(struct netfs_cache_resources *cres,
76 struct iov_iter *iter,
77 enum netfs_read_from_hole read_hole,
78 netfs_io_terminated_t term_func,
81 struct cachefiles_object *object;
82 struct cachefiles_kiocb *ki;
84 unsigned int old_nofs;
85 ssize_t ret = -ENOBUFS;
86 size_t len = iov_iter_count(iter), skipped = 0;
88 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
89 goto presubmission_error;
92 object = cachefiles_cres_object(cres);
93 file = cachefiles_cres_file(cres);
95 _enter("%pD,%li,%llx,%zx/%llx",
96 file, file_inode(file)->i_ino, start_pos, len,
97 i_size_read(file_inode(file)));
99 /* If the caller asked us to seek for data before doing the read, then
100 * we should do that now. If we find a gap, we fill it with zeros.
102 if (read_hole != NETFS_READ_HOLE_IGNORE) {
103 loff_t off = start_pos, off2;
105 off2 = cachefiles_inject_read_error();
107 off2 = vfs_llseek(file, off, SEEK_DATA);
108 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
111 goto presubmission_error;
114 if (off2 == -ENXIO || off2 >= start_pos + len) {
115 /* The region is beyond the EOF or there's no more data
116 * in the region, so clear the rest of the buffer and
120 if (read_hole == NETFS_READ_HOLE_FAIL)
121 goto presubmission_error;
123 iov_iter_zero(len, iter);
126 goto presubmission_error;
129 skipped = off2 - off;
130 iov_iter_zero(skipped, iter);
134 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
136 goto presubmission_error;
138 refcount_set(&ki->ki_refcnt, 2);
139 ki->iocb.ki_filp = file;
140 ki->iocb.ki_pos = start_pos + skipped;
141 ki->iocb.ki_flags = IOCB_DIRECT;
142 ki->iocb.ki_ioprio = get_current_ioprio();
143 ki->skipped = skipped;
145 ki->inval_counter = cres->inval_counter;
146 ki->term_func = term_func;
147 ki->term_func_priv = term_func_priv;
148 ki->was_async = true;
151 ki->iocb.ki_complete = cachefiles_read_complete;
153 get_file(ki->iocb.ki_filp);
154 cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
156 trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
157 old_nofs = memalloc_nofs_save();
158 ret = cachefiles_inject_read_error();
160 ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
161 memalloc_nofs_restore(old_nofs);
167 case -ERESTARTNOINTR:
168 case -ERESTARTNOHAND:
169 case -ERESTART_RESTARTBLOCK:
170 /* There's no easy way to restart the syscall since other AIO's
171 * may be already running. Just fail this IO with EINTR.
176 ki->was_async = false;
177 cachefiles_read_complete(&ki->iocb, ret);
184 cachefiles_put_kiocb(ki);
185 _leave(" = %zd", ret);
190 term_func(term_func_priv, ret < 0 ? ret : skipped, false);
195 * Query the occupancy of the cache in a region, returning where the next chunk
196 * of data starts and how long it is.
198 static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
199 loff_t start, size_t len, size_t granularity,
200 loff_t *_data_start, size_t *_data_len)
202 struct cachefiles_object *object;
209 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
212 object = cachefiles_cres_object(cres);
213 file = cachefiles_cres_file(cres);
214 granularity = max_t(size_t, object->volume->cache->bsize, granularity);
216 _enter("%pD,%li,%llx,%zx/%llx",
217 file, file_inode(file)->i_ino, start, len,
218 i_size_read(file_inode(file)));
220 off = cachefiles_inject_read_error();
222 off = vfs_llseek(file, start, SEEK_DATA);
224 return -ENODATA; /* Beyond EOF */
225 if (off < 0 && off >= (loff_t)-MAX_ERRNO)
226 return -ENOBUFS; /* Error. */
227 if (round_up(off, granularity) >= start + len)
228 return -ENODATA; /* No data in range */
230 off2 = cachefiles_inject_read_error();
232 off2 = vfs_llseek(file, off, SEEK_HOLE);
234 return -ENODATA; /* Beyond EOF */
235 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
236 return -ENOBUFS; /* Error. */
238 /* Round away partial blocks */
239 off = round_up(off, granularity);
240 off2 = round_down(off2, granularity);
245 if (off2 > start + len)
248 *_data_len = off2 - off;
253 * Handle completion of a write to the cache.
255 static void cachefiles_write_complete(struct kiocb *iocb, long ret)
257 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
258 struct cachefiles_object *object = ki->object;
259 struct inode *inode = file_inode(ki->iocb.ki_filp);
264 kiocb_end_write(iocb);
267 trace_cachefiles_io_error(object, inode, ret,
268 cachefiles_trace_write_error);
270 atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
271 set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
273 ki->term_func(ki->term_func_priv, ret, ki->was_async);
274 cachefiles_put_kiocb(ki);
278 * Initiate a write to the cache.
280 int __cachefiles_write(struct cachefiles_object *object,
283 struct iov_iter *iter,
284 netfs_io_terminated_t term_func,
285 void *term_func_priv)
287 struct cachefiles_cache *cache;
288 struct cachefiles_kiocb *ki;
289 unsigned int old_nofs;
291 size_t len = iov_iter_count(iter);
293 fscache_count_write();
294 cache = object->volume->cache;
296 _enter("%pD,%li,%llx,%zx/%llx",
297 file, file_inode(file)->i_ino, start_pos, len,
298 i_size_read(file_inode(file)));
300 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
303 term_func(term_func_priv, -ENOMEM, false);
307 refcount_set(&ki->ki_refcnt, 2);
308 ki->iocb.ki_filp = file;
309 ki->iocb.ki_pos = start_pos;
310 ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
311 ki->iocb.ki_ioprio = get_current_ioprio();
313 ki->start = start_pos;
315 ki->term_func = term_func;
316 ki->term_func_priv = term_func_priv;
317 ki->was_async = true;
318 ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
321 ki->iocb.ki_complete = cachefiles_write_complete;
322 atomic_long_add(ki->b_writing, &cache->b_writing);
324 get_file(ki->iocb.ki_filp);
325 cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
327 trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len);
328 old_nofs = memalloc_nofs_save();
329 ret = cachefiles_inject_write_error();
331 ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
332 memalloc_nofs_restore(old_nofs);
338 case -ERESTARTNOINTR:
339 case -ERESTARTNOHAND:
340 case -ERESTART_RESTARTBLOCK:
341 /* There's no easy way to restart the syscall since other AIO's
342 * may be already running. Just fail this IO with EINTR.
347 ki->was_async = false;
348 cachefiles_write_complete(&ki->iocb, ret);
355 cachefiles_put_kiocb(ki);
356 _leave(" = %zd", ret);
360 static int cachefiles_write(struct netfs_cache_resources *cres,
362 struct iov_iter *iter,
363 netfs_io_terminated_t term_func,
364 void *term_func_priv)
366 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
368 term_func(term_func_priv, -ENOBUFS, false);
372 return __cachefiles_write(cachefiles_cres_object(cres),
373 cachefiles_cres_file(cres),
375 term_func, term_func_priv);
378 static inline enum netfs_io_source
379 cachefiles_do_prepare_read(struct netfs_cache_resources *cres,
380 loff_t start, size_t *_len, loff_t i_size,
381 unsigned long *_flags, ino_t netfs_ino)
383 enum cachefiles_prepare_read_trace why;
384 struct cachefiles_object *object = NULL;
385 struct cachefiles_cache *cache;
386 struct fscache_cookie *cookie = fscache_cres_cookie(cres);
387 const struct cred *saved_cred;
388 struct file *file = cachefiles_cres_file(cres);
389 enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER;
392 ino_t ino = file ? file_inode(file)->i_ino : 0;
395 _enter("%zx @%llx/%llx", len, start, i_size);
397 if (start >= i_size) {
398 ret = NETFS_FILL_WITH_ZEROES;
399 why = cachefiles_trace_read_after_eof;
403 if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
404 __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
405 why = cachefiles_trace_read_no_data;
406 if (!test_bit(NETFS_SREQ_ONDEMAND, _flags))
410 /* The object and the file may be being created in the background. */
412 why = cachefiles_trace_read_no_file;
413 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
415 file = cachefiles_cres_file(cres);
418 ino = file_inode(file)->i_ino;
421 object = cachefiles_cres_object(cres);
422 cache = object->volume->cache;
423 cachefiles_begin_secure(cache, &saved_cred);
425 off = cachefiles_inject_read_error();
427 off = vfs_llseek(file, start, SEEK_DATA);
428 if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
429 if (off == (loff_t)-ENXIO) {
430 why = cachefiles_trace_read_seek_nxio;
431 goto download_and_store;
433 trace_cachefiles_io_error(object, file_inode(file), off,
434 cachefiles_trace_seek_error);
435 why = cachefiles_trace_read_seek_error;
439 if (off >= start + len) {
440 why = cachefiles_trace_read_found_hole;
441 goto download_and_store;
445 off = round_up(off, cache->bsize);
448 why = cachefiles_trace_read_found_part;
449 goto download_and_store;
452 to = cachefiles_inject_read_error();
454 to = vfs_llseek(file, start, SEEK_HOLE);
455 if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
456 trace_cachefiles_io_error(object, file_inode(file), to,
457 cachefiles_trace_seek_error);
458 why = cachefiles_trace_read_seek_error;
462 if (to < start + len) {
463 if (start + len >= i_size)
464 to = round_up(to, cache->bsize);
466 to = round_down(to, cache->bsize);
471 why = cachefiles_trace_read_have_data;
472 ret = NETFS_READ_FROM_CACHE;
476 __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
477 if (test_bit(NETFS_SREQ_ONDEMAND, _flags)) {
478 rc = cachefiles_ondemand_read(object, start, len);
480 __clear_bit(NETFS_SREQ_ONDEMAND, _flags);
483 ret = NETFS_INVALID_READ;
486 cachefiles_end_secure(cache, saved_cred);
488 trace_cachefiles_prep_read(object, start, len, *_flags, ret, why, ino, netfs_ino);
493 * Prepare a read operation, shortening it to a cached/uncached
494 * boundary as appropriate.
496 static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
497 unsigned long long i_size)
499 return cachefiles_do_prepare_read(&subreq->rreq->cache_resources,
500 subreq->start, &subreq->len, i_size,
501 &subreq->flags, subreq->rreq->inode->i_ino);
505 * Prepare an on-demand read operation, shortening it to a cached/uncached
506 * boundary as appropriate.
508 static enum netfs_io_source
509 cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres,
510 loff_t start, size_t *_len, loff_t i_size,
511 unsigned long *_flags, ino_t ino)
513 return cachefiles_do_prepare_read(cres, start, _len, i_size, _flags, ino);
517 * Prepare for a write to occur.
519 int __cachefiles_prepare_write(struct cachefiles_object *object,
521 loff_t *_start, size_t *_len, size_t upper_len,
522 bool no_space_allocated_yet)
524 struct cachefiles_cache *cache = object->volume->cache;
525 loff_t start = *_start, pos;
529 /* Round to DIO size */
530 start = round_down(*_start, PAGE_SIZE);
531 if (start != *_start || *_len > upper_len) {
532 /* Probably asked to cache a streaming write written into the
533 * pagecache when the cookie was temporarily out of service to
536 fscache_count_dio_misfit();
540 *_len = round_up(len, PAGE_SIZE);
542 /* We need to work out whether there's sufficient disk space to perform
543 * the write - but we can skip that check if we have space already
546 if (no_space_allocated_yet)
549 pos = cachefiles_inject_read_error();
551 pos = vfs_llseek(file, start, SEEK_DATA);
552 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
554 goto check_space; /* Unallocated tail */
555 trace_cachefiles_io_error(object, file_inode(file), pos,
556 cachefiles_trace_seek_error);
559 if ((u64)pos >= (u64)start + *_len)
560 goto check_space; /* Unallocated region */
562 /* We have a block that's at least partially filled - if we're low on
563 * space, we need to see if it's fully allocated. If it's not, we may
566 if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
567 cachefiles_has_space_check) == 0)
568 return 0; /* Enough space to simply overwrite the whole block */
570 pos = cachefiles_inject_read_error();
572 pos = vfs_llseek(file, start, SEEK_HOLE);
573 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
574 trace_cachefiles_io_error(object, file_inode(file), pos,
575 cachefiles_trace_seek_error);
578 if ((u64)pos >= (u64)start + *_len)
579 return 0; /* Fully allocated */
581 /* Partially allocated, but insufficient space: cull. */
582 fscache_count_no_write_space();
583 ret = cachefiles_inject_remove_error();
585 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
588 trace_cachefiles_io_error(object, file_inode(file), ret,
589 cachefiles_trace_fallocate_error);
590 cachefiles_io_error_obj(object,
591 "CacheFiles: fallocate failed (%d)\n", ret);
598 return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
599 cachefiles_has_space_for_write);
602 static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
603 loff_t *_start, size_t *_len, size_t upper_len,
604 loff_t i_size, bool no_space_allocated_yet)
606 struct cachefiles_object *object = cachefiles_cres_object(cres);
607 struct cachefiles_cache *cache = object->volume->cache;
608 const struct cred *saved_cred;
611 if (!cachefiles_cres_file(cres)) {
612 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
614 if (!cachefiles_cres_file(cres))
618 cachefiles_begin_secure(cache, &saved_cred);
619 ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
620 _start, _len, upper_len,
621 no_space_allocated_yet);
622 cachefiles_end_secure(cache, saved_cred);
626 static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest *subreq)
628 struct netfs_io_request *wreq = subreq->rreq;
629 struct netfs_cache_resources *cres = &wreq->cache_resources;
631 _enter("W=%x[%x] %llx", wreq->debug_id, subreq->debug_index, subreq->start);
633 subreq->max_len = ULONG_MAX;
634 subreq->max_nr_segs = BIO_MAX_VECS;
636 if (!cachefiles_cres_file(cres)) {
637 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
638 return netfs_prepare_write_failed(subreq);
639 if (!cachefiles_cres_file(cres))
640 return netfs_prepare_write_failed(subreq);
644 static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
646 struct netfs_io_request *wreq = subreq->rreq;
647 struct netfs_cache_resources *cres = &wreq->cache_resources;
648 struct cachefiles_object *object = cachefiles_cres_object(cres);
649 struct cachefiles_cache *cache = object->volume->cache;
650 const struct cred *saved_cred;
651 size_t off, pre, post, len = subreq->len;
652 loff_t start = subreq->start;
655 _enter("W=%x[%x] %llx-%llx",
656 wreq->debug_id, subreq->debug_index, start, start + len - 1);
658 /* We need to start on the cache granularity boundary */
659 off = start & (CACHEFILES_DIO_BLOCK_SIZE - 1);
661 pre = CACHEFILES_DIO_BLOCK_SIZE - off;
663 netfs_write_subrequest_terminated(subreq, len, false);
666 subreq->transferred += pre;
669 iov_iter_advance(&subreq->io_iter, pre);
672 /* We also need to end on the cache granularity boundary */
673 post = len & (CACHEFILES_DIO_BLOCK_SIZE - 1);
677 netfs_write_subrequest_terminated(subreq, post, false);
680 iov_iter_truncate(&subreq->io_iter, len);
683 cachefiles_begin_secure(cache, &saved_cred);
684 ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
685 &start, &len, len, true);
686 cachefiles_end_secure(cache, saved_cred);
688 netfs_write_subrequest_terminated(subreq, ret, false);
692 cachefiles_write(&subreq->rreq->cache_resources,
693 subreq->start, &subreq->io_iter,
694 netfs_write_subrequest_terminated, subreq);
698 * Clean up an operation.
700 static void cachefiles_end_operation(struct netfs_cache_resources *cres)
702 struct file *file = cachefiles_cres_file(cres);
706 fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
709 static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
710 .end_operation = cachefiles_end_operation,
711 .read = cachefiles_read,
712 .write = cachefiles_write,
713 .issue_write = cachefiles_issue_write,
714 .prepare_read = cachefiles_prepare_read,
715 .prepare_write = cachefiles_prepare_write,
716 .prepare_write_subreq = cachefiles_prepare_write_subreq,
717 .prepare_ondemand_read = cachefiles_prepare_ondemand_read,
718 .query_occupancy = cachefiles_query_occupancy,
722 * Open the cache file when beginning a cache operation.
724 bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
725 enum fscache_want_state want_state)
727 struct cachefiles_object *object = cachefiles_cres_object(cres);
729 if (!cachefiles_cres_file(cres)) {
730 cres->ops = &cachefiles_netfs_cache_ops;
732 spin_lock(&object->lock);
733 if (!cres->cache_priv2 && object->file)
734 cres->cache_priv2 = get_file(object->file);
735 spin_unlock(&object->lock);
739 if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
740 pr_err("failed to get cres->file\n");