1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* kiocb-using read/write
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
8 #include <linux/mount.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/falloc.h>
13 #include <linux/sched/mm.h>
14 #include <trace/events/fscache.h>
17 struct cachefiles_kiocb {
25 struct cachefiles_object *object;
26 netfs_io_terminated_t term_func;
29 unsigned int inval_counter; /* Copy of cookie->inval_counter */
33 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
35 if (refcount_dec_and_test(&ki->ki_refcnt)) {
36 cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
37 fput(ki->iocb.ki_filp);
43 * Handle completion of a read from the cache.
45 static void cachefiles_read_complete(struct kiocb *iocb, long ret)
47 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
48 struct inode *inode = file_inode(ki->iocb.ki_filp);
53 trace_cachefiles_io_error(ki->object, inode, ret,
54 cachefiles_trace_read_error);
58 if (ki->object->cookie->inval_counter == ki->inval_counter)
64 ki->term_func(ki->term_func_priv, ret, ki->was_async);
67 cachefiles_put_kiocb(ki);
71 * Initiate a read from the cache.
73 static int cachefiles_read(struct netfs_cache_resources *cres,
75 struct iov_iter *iter,
76 enum netfs_read_from_hole read_hole,
77 netfs_io_terminated_t term_func,
80 struct cachefiles_object *object;
81 struct cachefiles_kiocb *ki;
83 unsigned int old_nofs;
84 ssize_t ret = -ENOBUFS;
85 size_t len = iov_iter_count(iter), skipped = 0;
87 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
88 goto presubmission_error;
91 object = cachefiles_cres_object(cres);
92 file = cachefiles_cres_file(cres);
94 _enter("%pD,%li,%llx,%zx/%llx",
95 file, file_inode(file)->i_ino, start_pos, len,
96 i_size_read(file_inode(file)));
98 /* If the caller asked us to seek for data before doing the read, then
99 * we should do that now. If we find a gap, we fill it with zeros.
101 if (read_hole != NETFS_READ_HOLE_IGNORE) {
102 loff_t off = start_pos, off2;
104 off2 = cachefiles_inject_read_error();
106 off2 = vfs_llseek(file, off, SEEK_DATA);
107 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
110 goto presubmission_error;
113 if (off2 == -ENXIO || off2 >= start_pos + len) {
114 /* The region is beyond the EOF or there's no more data
115 * in the region, so clear the rest of the buffer and
119 if (read_hole == NETFS_READ_HOLE_FAIL)
120 goto presubmission_error;
122 iov_iter_zero(len, iter);
125 goto presubmission_error;
128 skipped = off2 - off;
129 iov_iter_zero(skipped, iter);
133 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
135 goto presubmission_error;
137 refcount_set(&ki->ki_refcnt, 2);
138 ki->iocb.ki_filp = file;
139 ki->iocb.ki_pos = start_pos + skipped;
140 ki->iocb.ki_flags = IOCB_DIRECT;
141 ki->iocb.ki_ioprio = get_current_ioprio();
142 ki->skipped = skipped;
144 ki->inval_counter = cres->inval_counter;
145 ki->term_func = term_func;
146 ki->term_func_priv = term_func_priv;
147 ki->was_async = true;
150 ki->iocb.ki_complete = cachefiles_read_complete;
152 get_file(ki->iocb.ki_filp);
153 cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
155 trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
156 old_nofs = memalloc_nofs_save();
157 ret = cachefiles_inject_read_error();
159 ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
160 memalloc_nofs_restore(old_nofs);
166 case -ERESTARTNOINTR:
167 case -ERESTARTNOHAND:
168 case -ERESTART_RESTARTBLOCK:
169 /* There's no easy way to restart the syscall since other AIO's
170 * may be already running. Just fail this IO with EINTR.
175 ki->was_async = false;
176 cachefiles_read_complete(&ki->iocb, ret);
183 cachefiles_put_kiocb(ki);
184 _leave(" = %zd", ret);
189 term_func(term_func_priv, ret < 0 ? ret : skipped, false);
194 * Query the occupancy of the cache in a region, returning where the next chunk
195 * of data starts and how long it is.
197 static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
198 loff_t start, size_t len, size_t granularity,
199 loff_t *_data_start, size_t *_data_len)
201 struct cachefiles_object *object;
208 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
211 object = cachefiles_cres_object(cres);
212 file = cachefiles_cres_file(cres);
213 granularity = max_t(size_t, object->volume->cache->bsize, granularity);
215 _enter("%pD,%li,%llx,%zx/%llx",
216 file, file_inode(file)->i_ino, start, len,
217 i_size_read(file_inode(file)));
219 off = cachefiles_inject_read_error();
221 off = vfs_llseek(file, start, SEEK_DATA);
223 return -ENODATA; /* Beyond EOF */
224 if (off < 0 && off >= (loff_t)-MAX_ERRNO)
225 return -ENOBUFS; /* Error. */
226 if (round_up(off, granularity) >= start + len)
227 return -ENODATA; /* No data in range */
229 off2 = cachefiles_inject_read_error();
231 off2 = vfs_llseek(file, off, SEEK_HOLE);
233 return -ENODATA; /* Beyond EOF */
234 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
235 return -ENOBUFS; /* Error. */
237 /* Round away partial blocks */
238 off = round_up(off, granularity);
239 off2 = round_down(off2, granularity);
244 if (off2 > start + len)
247 *_data_len = off2 - off;
252 * Handle completion of a write to the cache.
254 static void cachefiles_write_complete(struct kiocb *iocb, long ret)
256 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
257 struct cachefiles_object *object = ki->object;
258 struct inode *inode = file_inode(ki->iocb.ki_filp);
263 kiocb_end_write(iocb);
266 trace_cachefiles_io_error(object, inode, ret,
267 cachefiles_trace_write_error);
269 atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
270 set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
272 ki->term_func(ki->term_func_priv, ret, ki->was_async);
273 cachefiles_put_kiocb(ki);
277 * Initiate a write to the cache.
279 int __cachefiles_write(struct cachefiles_object *object,
282 struct iov_iter *iter,
283 netfs_io_terminated_t term_func,
284 void *term_func_priv)
286 struct cachefiles_cache *cache;
287 struct cachefiles_kiocb *ki;
288 unsigned int old_nofs;
290 size_t len = iov_iter_count(iter);
292 fscache_count_write();
293 cache = object->volume->cache;
295 _enter("%pD,%li,%llx,%zx/%llx",
296 file, file_inode(file)->i_ino, start_pos, len,
297 i_size_read(file_inode(file)));
299 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
302 term_func(term_func_priv, -ENOMEM, false);
306 refcount_set(&ki->ki_refcnt, 2);
307 ki->iocb.ki_filp = file;
308 ki->iocb.ki_pos = start_pos;
309 ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
310 ki->iocb.ki_ioprio = get_current_ioprio();
312 ki->start = start_pos;
314 ki->term_func = term_func;
315 ki->term_func_priv = term_func_priv;
316 ki->was_async = true;
317 ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
320 ki->iocb.ki_complete = cachefiles_write_complete;
321 atomic_long_add(ki->b_writing, &cache->b_writing);
323 get_file(ki->iocb.ki_filp);
324 cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
326 trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len);
327 old_nofs = memalloc_nofs_save();
328 ret = cachefiles_inject_write_error();
330 ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
331 memalloc_nofs_restore(old_nofs);
337 case -ERESTARTNOINTR:
338 case -ERESTARTNOHAND:
339 case -ERESTART_RESTARTBLOCK:
340 /* There's no easy way to restart the syscall since other AIO's
341 * may be already running. Just fail this IO with EINTR.
346 ki->was_async = false;
347 cachefiles_write_complete(&ki->iocb, ret);
354 cachefiles_put_kiocb(ki);
355 _leave(" = %zd", ret);
359 static int cachefiles_write(struct netfs_cache_resources *cres,
361 struct iov_iter *iter,
362 netfs_io_terminated_t term_func,
363 void *term_func_priv)
365 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
367 term_func(term_func_priv, -ENOBUFS, false);
371 return __cachefiles_write(cachefiles_cres_object(cres),
372 cachefiles_cres_file(cres),
374 term_func, term_func_priv);
377 static inline enum netfs_io_source
378 cachefiles_do_prepare_read(struct netfs_cache_resources *cres,
379 loff_t start, size_t *_len, loff_t i_size,
380 unsigned long *_flags, ino_t netfs_ino)
382 enum cachefiles_prepare_read_trace why;
383 struct cachefiles_object *object = NULL;
384 struct cachefiles_cache *cache;
385 struct fscache_cookie *cookie = fscache_cres_cookie(cres);
386 const struct cred *saved_cred;
387 struct file *file = cachefiles_cres_file(cres);
388 enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER;
391 ino_t ino = file ? file_inode(file)->i_ino : 0;
394 _enter("%zx @%llx/%llx", len, start, i_size);
396 if (start >= i_size) {
397 ret = NETFS_FILL_WITH_ZEROES;
398 why = cachefiles_trace_read_after_eof;
402 if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
403 __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
404 why = cachefiles_trace_read_no_data;
405 if (!test_bit(NETFS_SREQ_ONDEMAND, _flags))
409 /* The object and the file may be being created in the background. */
411 why = cachefiles_trace_read_no_file;
412 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
414 file = cachefiles_cres_file(cres);
417 ino = file_inode(file)->i_ino;
420 object = cachefiles_cres_object(cres);
421 cache = object->volume->cache;
422 cachefiles_begin_secure(cache, &saved_cred);
424 off = cachefiles_inject_read_error();
426 off = vfs_llseek(file, start, SEEK_DATA);
427 if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
428 if (off == (loff_t)-ENXIO) {
429 why = cachefiles_trace_read_seek_nxio;
430 goto download_and_store;
432 trace_cachefiles_io_error(object, file_inode(file), off,
433 cachefiles_trace_seek_error);
434 why = cachefiles_trace_read_seek_error;
438 if (off >= start + len) {
439 why = cachefiles_trace_read_found_hole;
440 goto download_and_store;
444 off = round_up(off, cache->bsize);
447 why = cachefiles_trace_read_found_part;
448 goto download_and_store;
451 to = cachefiles_inject_read_error();
453 to = vfs_llseek(file, start, SEEK_HOLE);
454 if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
455 trace_cachefiles_io_error(object, file_inode(file), to,
456 cachefiles_trace_seek_error);
457 why = cachefiles_trace_read_seek_error;
461 if (to < start + len) {
462 if (start + len >= i_size)
463 to = round_up(to, cache->bsize);
465 to = round_down(to, cache->bsize);
470 why = cachefiles_trace_read_have_data;
471 ret = NETFS_READ_FROM_CACHE;
475 __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
476 if (test_bit(NETFS_SREQ_ONDEMAND, _flags)) {
477 rc = cachefiles_ondemand_read(object, start, len);
479 __clear_bit(NETFS_SREQ_ONDEMAND, _flags);
482 ret = NETFS_INVALID_READ;
485 cachefiles_end_secure(cache, saved_cred);
487 trace_cachefiles_prep_read(object, start, len, *_flags, ret, why, ino, netfs_ino);
492 * Prepare a read operation, shortening it to a cached/uncached
493 * boundary as appropriate.
495 static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
498 return cachefiles_do_prepare_read(&subreq->rreq->cache_resources,
499 subreq->start, &subreq->len, i_size,
500 &subreq->flags, subreq->rreq->inode->i_ino);
504 * Prepare an on-demand read operation, shortening it to a cached/uncached
505 * boundary as appropriate.
507 static enum netfs_io_source
508 cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres,
509 loff_t start, size_t *_len, loff_t i_size,
510 unsigned long *_flags, ino_t ino)
512 return cachefiles_do_prepare_read(cres, start, _len, i_size, _flags, ino);
516 * Prepare for a write to occur.
518 int __cachefiles_prepare_write(struct cachefiles_object *object,
520 loff_t *_start, size_t *_len, size_t upper_len,
521 bool no_space_allocated_yet)
523 struct cachefiles_cache *cache = object->volume->cache;
524 loff_t start = *_start, pos;
528 /* Round to DIO size */
529 start = round_down(*_start, PAGE_SIZE);
530 if (start != *_start || *_len > upper_len) {
531 /* Probably asked to cache a streaming write written into the
532 * pagecache when the cookie was temporarily out of service to
535 fscache_count_dio_misfit();
539 *_len = round_up(len, PAGE_SIZE);
541 /* We need to work out whether there's sufficient disk space to perform
542 * the write - but we can skip that check if we have space already
545 if (no_space_allocated_yet)
548 pos = cachefiles_inject_read_error();
550 pos = vfs_llseek(file, start, SEEK_DATA);
551 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
553 goto check_space; /* Unallocated tail */
554 trace_cachefiles_io_error(object, file_inode(file), pos,
555 cachefiles_trace_seek_error);
558 if ((u64)pos >= (u64)start + *_len)
559 goto check_space; /* Unallocated region */
561 /* We have a block that's at least partially filled - if we're low on
562 * space, we need to see if it's fully allocated. If it's not, we may
565 if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
566 cachefiles_has_space_check) == 0)
567 return 0; /* Enough space to simply overwrite the whole block */
569 pos = cachefiles_inject_read_error();
571 pos = vfs_llseek(file, start, SEEK_HOLE);
572 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
573 trace_cachefiles_io_error(object, file_inode(file), pos,
574 cachefiles_trace_seek_error);
577 if ((u64)pos >= (u64)start + *_len)
578 return 0; /* Fully allocated */
580 /* Partially allocated, but insufficient space: cull. */
581 fscache_count_no_write_space();
582 ret = cachefiles_inject_remove_error();
584 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
587 trace_cachefiles_io_error(object, file_inode(file), ret,
588 cachefiles_trace_fallocate_error);
589 cachefiles_io_error_obj(object,
590 "CacheFiles: fallocate failed (%d)\n", ret);
597 return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
598 cachefiles_has_space_for_write);
601 static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
602 loff_t *_start, size_t *_len, size_t upper_len,
603 loff_t i_size, bool no_space_allocated_yet)
605 struct cachefiles_object *object = cachefiles_cres_object(cres);
606 struct cachefiles_cache *cache = object->volume->cache;
607 const struct cred *saved_cred;
610 if (!cachefiles_cres_file(cres)) {
611 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
613 if (!cachefiles_cres_file(cres))
617 cachefiles_begin_secure(cache, &saved_cred);
618 ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
619 _start, _len, upper_len,
620 no_space_allocated_yet);
621 cachefiles_end_secure(cache, saved_cred);
626 * Clean up an operation.
628 static void cachefiles_end_operation(struct netfs_cache_resources *cres)
630 struct file *file = cachefiles_cres_file(cres);
634 fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
637 static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
638 .end_operation = cachefiles_end_operation,
639 .read = cachefiles_read,
640 .write = cachefiles_write,
641 .prepare_read = cachefiles_prepare_read,
642 .prepare_write = cachefiles_prepare_write,
643 .prepare_ondemand_read = cachefiles_prepare_ondemand_read,
644 .query_occupancy = cachefiles_query_occupancy,
648 * Open the cache file when beginning a cache operation.
650 bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
651 enum fscache_want_state want_state)
653 struct cachefiles_object *object = cachefiles_cres_object(cres);
655 if (!cachefiles_cres_file(cres)) {
656 cres->ops = &cachefiles_netfs_cache_ops;
658 spin_lock(&object->lock);
659 if (!cres->cache_priv2 && object->file)
660 cres->cache_priv2 = get_file(object->file);
661 spin_unlock(&object->lock);
665 if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
666 pr_err("failed to get cres->file\n");