]> Git Repo - linux.git/blame - fs/netfs/read_helper.c
netfs: Split netfs_io_* object handling out
[linux.git] / fs / netfs / read_helper.c
CommitLineData
3d3c9504
DH
1// SPDX-License-Identifier: GPL-2.0-or-later
2/* Network filesystem high-level read support.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells ([email protected])
6 */
7
8#include <linux/module.h>
9#include <linux/export.h>
10#include <linux/fs.h>
11#include <linux/mm.h>
12#include <linux/pagemap.h>
13#include <linux/slab.h>
14#include <linux/uio.h>
15#include <linux/sched/mm.h>
16#include <linux/task_io_accounting_ops.h>
17#include <linux/netfs.h>
18#include "internal.h"
77b4d2c6
DH
19#define CREATE_TRACE_POINTS
20#include <trace/events/netfs.h>
3d3c9504
DH
21
22MODULE_DESCRIPTION("Network fs support");
23MODULE_AUTHOR("Red Hat, Inc.");
24MODULE_LICENSE("GPL");
25
26unsigned netfs_debug;
27module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
28MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
29
3d3c9504
DH
30/*
31 * Clear the unread part of an I/O request.
32 */
6a19114b 33static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
3d3c9504
DH
34{
35 struct iov_iter iter;
36
330de47d 37 iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages,
3d3c9504
DH
38 subreq->start + subreq->transferred,
39 subreq->len - subreq->transferred);
40 iov_iter_zero(iov_iter_count(&iter), &iter);
41}
42
726218fd
DH
43static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
44 bool was_async)
45{
6a19114b 46 struct netfs_io_subrequest *subreq = priv;
726218fd
DH
47
48 netfs_subreq_terminated(subreq, transferred_or_error, was_async);
49}
50
51/*
52 * Issue a read against the cache.
53 * - Eats the caller's ref on subreq.
54 */
6a19114b
DH
55static void netfs_read_from_cache(struct netfs_io_request *rreq,
56 struct netfs_io_subrequest *subreq,
3a11b3a8 57 enum netfs_read_from_hole read_hole)
726218fd
DH
58{
59 struct netfs_cache_resources *cres = &rreq->cache_resources;
60 struct iov_iter iter;
61
62 netfs_stat(&netfs_n_rh_read);
63 iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
64 subreq->start + subreq->transferred,
65 subreq->len - subreq->transferred);
66
3a11b3a8 67 cres->ops->read(cres, subreq->start, &iter, read_hole,
726218fd
DH
68 netfs_cache_read_terminated, subreq);
69}
70
3d3c9504
DH
71/*
72 * Fill a subrequest region with zeroes.
73 */
6a19114b
DH
74static void netfs_fill_with_zeroes(struct netfs_io_request *rreq,
75 struct netfs_io_subrequest *subreq)
3d3c9504 76{
289af54c 77 netfs_stat(&netfs_n_rh_zero);
3d3c9504
DH
78 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
79 netfs_subreq_terminated(subreq, 0, false);
80}
81
82/*
83 * Ask the netfs to issue a read request to the server for us.
84 *
85 * The netfs is expected to read from subreq->pos + subreq->transferred to
86 * subreq->pos + subreq->len - 1. It may not backtrack and write data into the
87 * buffer prior to the transferred point as it might clobber dirty data
88 * obtained from the cache.
89 *
90 * Alternatively, the netfs is allowed to indicate one of two things:
91 *
92 * - NETFS_SREQ_SHORT_READ: A short read - it will get called again to try and
93 * make progress.
94 *
95 * - NETFS_SREQ_CLEAR_TAIL: A short read - the rest of the buffer will be
96 * cleared.
97 */
6a19114b
DH
98static void netfs_read_from_server(struct netfs_io_request *rreq,
99 struct netfs_io_subrequest *subreq)
3d3c9504 100{
289af54c 101 netfs_stat(&netfs_n_rh_download);
f18a3785 102 rreq->netfs_ops->issue_read(subreq);
3d3c9504
DH
103}
104
105/*
106 * Release those waiting.
107 */
6a19114b 108static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async)
3d3c9504 109{
77b4d2c6 110 trace_netfs_rreq(rreq, netfs_rreq_trace_done);
f18a3785
DH
111 netfs_clear_subrequests(rreq, was_async);
112 netfs_put_request(rreq, was_async);
3d3c9504
DH
113}
114
726218fd
DH
115/*
116 * Deal with the completion of writing the data to the cache. We have to clear
78525c74 117 * the PG_fscache bits on the folios involved and release the caller's ref.
726218fd
DH
118 *
119 * May be called in softirq mode and we inherit a ref from the caller.
120 */
6a19114b 121static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
726218fd
DH
122 bool was_async)
123{
6a19114b 124 struct netfs_io_subrequest *subreq;
78525c74 125 struct folio *folio;
726218fd
DH
126 pgoff_t unlocked = 0;
127 bool have_unlocked = false;
128
129 rcu_read_lock();
130
131 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
132 XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
133
78525c74 134 xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
726218fd 135 /* We might have multiple writes from the same huge
78525c74 136 * folio, but we mustn't unlock a folio more than once.
726218fd 137 */
78525c74 138 if (have_unlocked && folio_index(folio) <= unlocked)
726218fd 139 continue;
78525c74
DH
140 unlocked = folio_index(folio);
141 folio_end_fscache(folio);
726218fd
DH
142 have_unlocked = true;
143 }
144 }
145
146 rcu_read_unlock();
147 netfs_rreq_completed(rreq, was_async);
148}
149
150static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error,
151 bool was_async)
152{
6a19114b
DH
153 struct netfs_io_subrequest *subreq = priv;
154 struct netfs_io_request *rreq = subreq->rreq;
726218fd
DH
155
156 if (IS_ERR_VALUE(transferred_or_error)) {
157 netfs_stat(&netfs_n_rh_write_failed);
0246f3e5
DH
158 trace_netfs_failure(rreq, subreq, transferred_or_error,
159 netfs_fail_copy_to_cache);
726218fd
DH
160 } else {
161 netfs_stat(&netfs_n_rh_write_done);
162 }
163
164 trace_netfs_sreq(subreq, netfs_sreq_trace_write_term);
165
6a19114b
DH
166 /* If we decrement nr_copy_ops to 0, the ref belongs to us. */
167 if (atomic_dec_and_test(&rreq->nr_copy_ops))
726218fd
DH
168 netfs_rreq_unmark_after_write(rreq, was_async);
169
170 netfs_put_subrequest(subreq, was_async);
171}
172
173/*
174 * Perform any outstanding writes to the cache. We inherit a ref from the
175 * caller.
176 */
6a19114b 177static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
726218fd
DH
178{
179 struct netfs_cache_resources *cres = &rreq->cache_resources;
6a19114b 180 struct netfs_io_subrequest *subreq, *next, *p;
726218fd
DH
181 struct iov_iter iter;
182 int ret;
183
184 trace_netfs_rreq(rreq, netfs_rreq_trace_write);
185
186 /* We don't want terminating writes trying to wake us up whilst we're
187 * still going through the list.
188 */
6a19114b 189 atomic_inc(&rreq->nr_copy_ops);
726218fd
DH
190
191 list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) {
f18a3785 192 if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
726218fd
DH
193 list_del_init(&subreq->rreq_link);
194 netfs_put_subrequest(subreq, false);
195 }
196 }
197
198 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
199 /* Amalgamate adjacent writes */
200 while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
201 next = list_next_entry(subreq, rreq_link);
202 if (next->start != subreq->start + subreq->len)
203 break;
204 subreq->len += next->len;
205 list_del_init(&next->rreq_link);
206 netfs_put_subrequest(next, false);
207 }
208
209 ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
a39c41b8 210 rreq->i_size, true);
726218fd 211 if (ret < 0) {
0246f3e5 212 trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
726218fd
DH
213 trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
214 continue;
215 }
216
217 iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages,
218 subreq->start, subreq->len);
219
6a19114b 220 atomic_inc(&rreq->nr_copy_ops);
726218fd 221 netfs_stat(&netfs_n_rh_write);
f18a3785 222 netfs_get_subrequest(subreq);
726218fd
DH
223 trace_netfs_sreq(subreq, netfs_sreq_trace_write);
224 cres->ops->write(cres, subreq->start, &iter,
225 netfs_rreq_copy_terminated, subreq);
226 }
227
6a19114b
DH
228 /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */
229 if (atomic_dec_and_test(&rreq->nr_copy_ops))
726218fd
DH
230 netfs_rreq_unmark_after_write(rreq, false);
231}
232
233static void netfs_rreq_write_to_cache_work(struct work_struct *work)
234{
6a19114b
DH
235 struct netfs_io_request *rreq =
236 container_of(work, struct netfs_io_request, work);
726218fd
DH
237
238 netfs_rreq_do_write_to_cache(rreq);
239}
240
6a19114b 241static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq)
726218fd 242{
598ad0bd
DH
243 rreq->work.func = netfs_rreq_write_to_cache_work;
244 if (!queue_work(system_unbound_wq, &rreq->work))
245 BUG();
726218fd
DH
246}
247
3d3c9504 248/*
78525c74
DH
249 * Unlock the folios in a read operation. We need to set PG_fscache on any
250 * folios we're going to write back before we unlock them.
3d3c9504 251 */
6a19114b 252static void netfs_rreq_unlock(struct netfs_io_request *rreq)
3d3c9504 253{
6a19114b 254 struct netfs_io_subrequest *subreq;
78525c74 255 struct folio *folio;
3d3c9504
DH
256 unsigned int iopos, account = 0;
257 pgoff_t start_page = rreq->start / PAGE_SIZE;
258 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
259 bool subreq_failed = false;
3d3c9504
DH
260
261 XA_STATE(xas, &rreq->mapping->i_pages, start_page);
262
263 if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
f18a3785 264 __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
3d3c9504 265 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
f18a3785 266 __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
3d3c9504
DH
267 }
268 }
269
270 /* Walk through the pagecache and the I/O request lists simultaneously.
271 * We may have a mixture of cached and uncached sections and we only
272 * really want to write out the uncached sections. This is slightly
273 * complicated by the possibility that we might have huge pages with a
274 * mixture inside.
275 */
276 subreq = list_first_entry(&rreq->subrequests,
6a19114b 277 struct netfs_io_subrequest, rreq_link);
3d3c9504
DH
278 iopos = 0;
279 subreq_failed = (subreq->error < 0);
280
77b4d2c6
DH
281 trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
282
3d3c9504 283 rcu_read_lock();
78525c74
DH
284 xas_for_each(&xas, folio, last_page) {
285 unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
286 unsigned int pgend = pgpos + folio_size(folio);
3d3c9504
DH
287 bool pg_failed = false;
288
289 for (;;) {
290 if (!subreq) {
291 pg_failed = true;
292 break;
293 }
f18a3785 294 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
78525c74 295 folio_start_fscache(folio);
3d3c9504
DH
296 pg_failed |= subreq_failed;
297 if (pgend < iopos + subreq->len)
298 break;
299
300 account += subreq->transferred;
301 iopos += subreq->len;
302 if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
303 subreq = list_next_entry(subreq, rreq_link);
304 subreq_failed = (subreq->error < 0);
305 } else {
306 subreq = NULL;
307 subreq_failed = false;
308 }
309 if (pgend == iopos)
310 break;
311 }
312
313 if (!pg_failed) {
78525c74
DH
314 flush_dcache_folio(folio);
315 folio_mark_uptodate(folio);
3d3c9504
DH
316 }
317
78525c74
DH
318 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
319 if (folio_index(folio) == rreq->no_unlock_folio &&
320 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
3d3c9504
DH
321 _debug("no unlock");
322 else
78525c74 323 folio_unlock(folio);
3d3c9504
DH
324 }
325 }
326 rcu_read_unlock();
327
328 task_io_account_read(account);
329 if (rreq->netfs_ops->done)
330 rreq->netfs_ops->done(rreq);
331}
332
333/*
334 * Handle a short read.
335 */
6a19114b
DH
336static void netfs_rreq_short_read(struct netfs_io_request *rreq,
337 struct netfs_io_subrequest *subreq)
3d3c9504 338{
f18a3785 339 __clear_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
3d3c9504
DH
340 __set_bit(NETFS_SREQ_SEEK_DATA_READ, &subreq->flags);
341
289af54c 342 netfs_stat(&netfs_n_rh_short_read);
77b4d2c6
DH
343 trace_netfs_sreq(subreq, netfs_sreq_trace_resubmit_short);
344
f18a3785 345 netfs_get_subrequest(subreq);
6a19114b 346 atomic_inc(&rreq->nr_outstanding);
726218fd 347 if (subreq->source == NETFS_READ_FROM_CACHE)
3a11b3a8 348 netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_CLEAR);
726218fd
DH
349 else
350 netfs_read_from_server(rreq, subreq);
3d3c9504
DH
351}
352
353/*
354 * Resubmit any short or failed operations. Returns true if we got the rreq
355 * ref back.
356 */
6a19114b 357static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
3d3c9504 358{
6a19114b 359 struct netfs_io_subrequest *subreq;
3d3c9504
DH
360
361 WARN_ON(in_interrupt());
362
77b4d2c6
DH
363 trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
364
3d3c9504
DH
365 /* We don't want terminating submissions trying to wake us up whilst
366 * we're still going through the list.
367 */
6a19114b 368 atomic_inc(&rreq->nr_outstanding);
3d3c9504
DH
369
370 __clear_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
371 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
372 if (subreq->error) {
373 if (subreq->source != NETFS_READ_FROM_CACHE)
374 break;
375 subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
376 subreq->error = 0;
289af54c 377 netfs_stat(&netfs_n_rh_download_instead);
77b4d2c6 378 trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead);
f18a3785 379 netfs_get_subrequest(subreq);
6a19114b 380 atomic_inc(&rreq->nr_outstanding);
3d3c9504 381 netfs_read_from_server(rreq, subreq);
f18a3785 382 } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) {
3d3c9504
DH
383 netfs_rreq_short_read(rreq, subreq);
384 }
385 }
386
6a19114b
DH
387 /* If we decrement nr_outstanding to 0, the usage ref belongs to us. */
388 if (atomic_dec_and_test(&rreq->nr_outstanding))
3d3c9504
DH
389 return true;
390
6a19114b 391 wake_up_var(&rreq->nr_outstanding);
3d3c9504
DH
392 return false;
393}
394
726218fd
DH
395/*
396 * Check to see if the data read is still valid.
397 */
6a19114b 398static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq)
726218fd 399{
6a19114b 400 struct netfs_io_subrequest *subreq;
726218fd
DH
401
402 if (!rreq->netfs_ops->is_still_valid ||
403 rreq->netfs_ops->is_still_valid(rreq))
404 return;
405
406 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
407 if (subreq->source == NETFS_READ_FROM_CACHE) {
408 subreq->error = -ESTALE;
409 __set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
410 }
411 }
412}
413
3d3c9504
DH
414/*
415 * Assess the state of a read request and decide what to do next.
416 *
417 * Note that we could be in an ordinary kernel thread, on a workqueue or in
418 * softirq context at this point. We inherit a ref from the caller.
419 */
6a19114b 420static void netfs_rreq_assess(struct netfs_io_request *rreq, bool was_async)
3d3c9504 421{
77b4d2c6
DH
422 trace_netfs_rreq(rreq, netfs_rreq_trace_assess);
423
3d3c9504 424again:
726218fd
DH
425 netfs_rreq_is_still_valid(rreq);
426
3d3c9504
DH
427 if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) &&
428 test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) {
429 if (netfs_rreq_perform_resubmissions(rreq))
430 goto again;
431 return;
432 }
433
434 netfs_rreq_unlock(rreq);
435
436 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
437 wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
438
f18a3785 439 if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags))
598ad0bd 440 return netfs_rreq_write_to_cache(rreq);
726218fd 441
3d3c9504
DH
442 netfs_rreq_completed(rreq, was_async);
443}
444
3a4a38e6 445void netfs_rreq_work(struct work_struct *work)
3d3c9504 446{
6a19114b
DH
447 struct netfs_io_request *rreq =
448 container_of(work, struct netfs_io_request, work);
3d3c9504
DH
449 netfs_rreq_assess(rreq, false);
450}
451
452/*
453 * Handle the completion of all outstanding I/O operations on a read request.
454 * We inherit a ref from the caller.
455 */
6a19114b 456static void netfs_rreq_terminated(struct netfs_io_request *rreq,
3d3c9504
DH
457 bool was_async)
458{
459 if (test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags) &&
460 was_async) {
461 if (!queue_work(system_unbound_wq, &rreq->work))
462 BUG();
463 } else {
464 netfs_rreq_assess(rreq, was_async);
465 }
466}
467
468/**
469 * netfs_subreq_terminated - Note the termination of an I/O operation.
470 * @subreq: The I/O request that has terminated.
471 * @transferred_or_error: The amount of data transferred or an error code.
472 * @was_async: The termination was asynchronous
473 *
474 * This tells the read helper that a contributory I/O operation has terminated,
475 * one way or another, and that it should integrate the results.
476 *
477 * The caller indicates in @transferred_or_error the outcome of the operation,
478 * supplying a positive value to indicate the number of bytes transferred, 0 to
479 * indicate a failure to transfer anything that should be retried or a negative
480 * error code. The helper will look after reissuing I/O operations as
481 * appropriate and writing downloaded data to the cache.
482 *
483 * If @was_async is true, the caller might be running in softirq or interrupt
484 * context and we can't sleep.
485 */
6a19114b 486void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
3d3c9504
DH
487 ssize_t transferred_or_error,
488 bool was_async)
489{
6a19114b 490 struct netfs_io_request *rreq = subreq->rreq;
3d3c9504
DH
491 int u;
492
493 _enter("[%u]{%llx,%lx},%zd",
494 subreq->debug_index, subreq->start, subreq->flags,
495 transferred_or_error);
496
289af54c
DH
497 switch (subreq->source) {
498 case NETFS_READ_FROM_CACHE:
499 netfs_stat(&netfs_n_rh_read_done);
500 break;
501 case NETFS_DOWNLOAD_FROM_SERVER:
502 netfs_stat(&netfs_n_rh_download_done);
503 break;
504 default:
505 break;
506 }
507
3d3c9504
DH
508 if (IS_ERR_VALUE(transferred_or_error)) {
509 subreq->error = transferred_or_error;
0246f3e5
DH
510 trace_netfs_failure(rreq, subreq, transferred_or_error,
511 netfs_fail_read);
3d3c9504
DH
512 goto failed;
513 }
514
515 if (WARN(transferred_or_error > subreq->len - subreq->transferred,
516 "Subreq overread: R%x[%x] %zd > %zu - %zu",
517 rreq->debug_id, subreq->debug_index,
518 transferred_or_error, subreq->len, subreq->transferred))
519 transferred_or_error = subreq->len - subreq->transferred;
520
521 subreq->error = 0;
522 subreq->transferred += transferred_or_error;
523 if (subreq->transferred < subreq->len)
524 goto incomplete;
525
526complete:
527 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
f18a3785
DH
528 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
529 set_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
3d3c9504
DH
530
531out:
77b4d2c6
DH
532 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
533
6a19114b
DH
534 /* If we decrement nr_outstanding to 0, the ref belongs to us. */
535 u = atomic_dec_return(&rreq->nr_outstanding);
3d3c9504
DH
536 if (u == 0)
537 netfs_rreq_terminated(rreq, was_async);
538 else if (u == 1)
6a19114b 539 wake_up_var(&rreq->nr_outstanding);
3d3c9504
DH
540
541 netfs_put_subrequest(subreq, was_async);
542 return;
543
544incomplete:
545 if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
546 netfs_clear_unread(subreq);
547 subreq->transferred = subreq->len;
548 goto complete;
549 }
550
551 if (transferred_or_error == 0) {
552 if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
553 subreq->error = -ENODATA;
554 goto failed;
555 }
556 } else {
557 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
558 }
559
f18a3785 560 __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
3d3c9504
DH
561 set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
562 goto out;
563
564failed:
565 if (subreq->source == NETFS_READ_FROM_CACHE) {
289af54c 566 netfs_stat(&netfs_n_rh_read_failed);
3d3c9504
DH
567 set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
568 } else {
289af54c 569 netfs_stat(&netfs_n_rh_download_failed);
3d3c9504
DH
570 set_bit(NETFS_RREQ_FAILED, &rreq->flags);
571 rreq->error = subreq->error;
572 }
573 goto out;
574}
575EXPORT_SYMBOL(netfs_subreq_terminated);
576
6a19114b 577static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_subrequest *subreq,
3d3c9504
DH
578 loff_t i_size)
579{
6a19114b 580 struct netfs_io_request *rreq = subreq->rreq;
726218fd 581 struct netfs_cache_resources *cres = &rreq->cache_resources;
3d3c9504 582
726218fd
DH
583 if (cres->ops)
584 return cres->ops->prepare_read(subreq, i_size);
3d3c9504
DH
585 if (subreq->start >= rreq->i_size)
586 return NETFS_FILL_WITH_ZEROES;
587 return NETFS_DOWNLOAD_FROM_SERVER;
588}
589
590/*
591 * Work out what sort of subrequest the next one will be.
592 */
6a19114b
DH
593static enum netfs_io_source
594netfs_rreq_prepare_read(struct netfs_io_request *rreq,
595 struct netfs_io_subrequest *subreq)
3d3c9504 596{
6a19114b 597 enum netfs_io_source source;
3d3c9504
DH
598
599 _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
600
601 source = netfs_cache_prepare_read(subreq, rreq->i_size);
602 if (source == NETFS_INVALID_READ)
603 goto out;
604
605 if (source == NETFS_DOWNLOAD_FROM_SERVER) {
606 /* Call out to the netfs to let it shrink the request to fit
607 * its own I/O sizes and boundaries. If it shinks it here, it
608 * will be called again to make simultaneous calls; if it wants
609 * to make serial calls, it can indicate a short read and then
610 * we will call it again.
611 */
612 if (subreq->len > rreq->i_size - subreq->start)
613 subreq->len = rreq->i_size - subreq->start;
614
615 if (rreq->netfs_ops->clamp_length &&
616 !rreq->netfs_ops->clamp_length(subreq)) {
617 source = NETFS_INVALID_READ;
618 goto out;
619 }
620 }
621
622 if (WARN_ON(subreq->len == 0))
623 source = NETFS_INVALID_READ;
624
625out:
626 subreq->source = source;
77b4d2c6 627 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
3d3c9504
DH
628 return source;
629}
630
631/*
632 * Slice off a piece of a read request and submit an I/O request for it.
633 */
6a19114b 634static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
3d3c9504
DH
635 unsigned int *_debug_index)
636{
6a19114b
DH
637 struct netfs_io_subrequest *subreq;
638 enum netfs_io_source source;
3d3c9504
DH
639
640 subreq = netfs_alloc_subrequest(rreq);
641 if (!subreq)
642 return false;
643
644 subreq->debug_index = (*_debug_index)++;
645 subreq->start = rreq->start + rreq->submitted;
646 subreq->len = rreq->len - rreq->submitted;
647
648 _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
649 list_add_tail(&subreq->rreq_link, &rreq->subrequests);
650
651 /* Call out to the cache to find out what it can do with the remaining
652 * subset. It tells us in subreq->flags what it decided should be done
653 * and adjusts subreq->len down if the subset crosses a cache boundary.
654 *
655 * Then when we hand the subset, it can choose to take a subset of that
656 * (the starts must coincide), in which case, we go around the loop
657 * again and ask it to download the next piece.
658 */
659 source = netfs_rreq_prepare_read(rreq, subreq);
660 if (source == NETFS_INVALID_READ)
661 goto subreq_failed;
662
6a19114b 663 atomic_inc(&rreq->nr_outstanding);
3d3c9504
DH
664
665 rreq->submitted += subreq->len;
666
77b4d2c6 667 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
3d3c9504
DH
668 switch (source) {
669 case NETFS_FILL_WITH_ZEROES:
670 netfs_fill_with_zeroes(rreq, subreq);
671 break;
672 case NETFS_DOWNLOAD_FROM_SERVER:
673 netfs_read_from_server(rreq, subreq);
674 break;
726218fd 675 case NETFS_READ_FROM_CACHE:
3a11b3a8 676 netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_IGNORE);
726218fd 677 break;
3d3c9504
DH
678 default:
679 BUG();
680 }
681
682 return true;
683
684subreq_failed:
685 rreq->error = subreq->error;
686 netfs_put_subrequest(subreq, false);
687 return false;
688}
689
6a19114b 690static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
726218fd
DH
691 loff_t *_start, size_t *_len, loff_t i_size)
692{
693 struct netfs_cache_resources *cres = &rreq->cache_resources;
694
695 if (cres->ops && cres->ops->expand_readahead)
696 cres->ops->expand_readahead(cres, _start, _len, i_size);
697}
698
6a19114b 699static void netfs_rreq_expand(struct netfs_io_request *rreq,
3d3c9504
DH
700 struct readahead_control *ractl)
701{
726218fd
DH
702 /* Give the cache a chance to change the request parameters. The
703 * resultant request must contain the original region.
704 */
705 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
706
3d3c9504
DH
707 /* Give the netfs a chance to change the request parameters. The
708 * resultant request must contain the original region.
709 */
710 if (rreq->netfs_ops->expand_readahead)
711 rreq->netfs_ops->expand_readahead(rreq);
712
713 /* Expand the request if the cache wants it to start earlier. Note
714 * that the expansion may get further extended if the VM wishes to
715 * insert THPs and the preferred start and/or end wind up in the middle
716 * of THPs.
717 *
718 * If this is the case, however, the THP size should be an integer
719 * multiple of the cache granule size, so we get a whole number of
720 * granules to deal with.
721 */
722 if (rreq->start != readahead_pos(ractl) ||
723 rreq->len != readahead_length(ractl)) {
724 readahead_expand(ractl, rreq->start, rreq->len);
725 rreq->start = readahead_pos(ractl);
726 rreq->len = readahead_length(ractl);
77b4d2c6
DH
727
728 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
729 netfs_read_trace_expanded);
3d3c9504
DH
730 }
731}
732
733/**
734 * netfs_readahead - Helper to manage a read request
735 * @ractl: The description of the readahead request
736 * @ops: The network filesystem's operations for the helper to use
737 * @netfs_priv: Private netfs data to be retained in the request
738 *
739 * Fulfil a readahead request by drawing data from the cache if possible, or
740 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O
741 * requests from different sources will get munged together. If necessary, the
742 * readahead window can be expanded in either direction to a more convenient
743 * alighment for RPC efficiency or to make storage in the cache feasible.
744 *
745 * The calling netfs must provide a table of operations, only one of which,
746 * issue_op, is mandatory. It may also be passed a private token, which will
747 * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup().
748 *
749 * This is usable whether or not caching is enabled.
750 */
751void netfs_readahead(struct readahead_control *ractl,
6a19114b 752 const struct netfs_request_ops *ops,
3d3c9504
DH
753 void *netfs_priv)
754{
6a19114b 755 struct netfs_io_request *rreq;
3d3c9504 756 unsigned int debug_index = 0;
726218fd 757 int ret;
3d3c9504
DH
758
759 _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
760
761 if (readahead_count(ractl) == 0)
762 goto cleanup;
763
f18a3785 764 rreq = netfs_alloc_request(ops, netfs_priv, ractl->file);
3d3c9504
DH
765 if (!rreq)
766 goto cleanup;
767 rreq->mapping = ractl->mapping;
768 rreq->start = readahead_pos(ractl);
769 rreq->len = readahead_length(ractl);
770
726218fd
DH
771 if (ops->begin_cache_operation) {
772 ret = ops->begin_cache_operation(rreq);
773 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
774 goto cleanup_free;
775 }
776
289af54c 777 netfs_stat(&netfs_n_rh_readahead);
77b4d2c6
DH
778 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
779 netfs_read_trace_readahead);
780
3d3c9504
DH
781 netfs_rreq_expand(rreq, ractl);
782
6a19114b 783 atomic_set(&rreq->nr_outstanding, 1);
3d3c9504
DH
784 do {
785 if (!netfs_rreq_submit_slice(rreq, &debug_index))
786 break;
787
788 } while (rreq->submitted < rreq->len);
789
78525c74 790 /* Drop the refs on the folios here rather than in the cache or
3d3c9504
DH
791 * filesystem. The locks will be dropped in netfs_rreq_unlock().
792 */
78525c74
DH
793 while (readahead_folio(ractl))
794 ;
3d3c9504 795
6a19114b
DH
796 /* If we decrement nr_outstanding to 0, the ref belongs to us. */
797 if (atomic_dec_and_test(&rreq->nr_outstanding))
3d3c9504
DH
798 netfs_rreq_assess(rreq, false);
799 return;
800
726218fd 801cleanup_free:
f18a3785 802 netfs_put_request(rreq, false);
726218fd 803 return;
3d3c9504
DH
804cleanup:
805 if (netfs_priv)
806 ops->cleanup(ractl->mapping, netfs_priv);
807 return;
808}
809EXPORT_SYMBOL(netfs_readahead);
810
811/**
53b776c7 812 * netfs_readpage - Helper to manage a readpage request
3d3c9504 813 * @file: The file to read from
78525c74 814 * @folio: The folio to read
3d3c9504
DH
815 * @ops: The network filesystem's operations for the helper to use
816 * @netfs_priv: Private netfs data to be retained in the request
817 *
818 * Fulfil a readpage request by drawing data from the cache if possible, or the
819 * netfs if not. Space beyond the EOF is zero-filled. Multiple I/O requests
820 * from different sources will get munged together.
821 *
822 * The calling netfs must provide a table of operations, only one of which,
823 * issue_op, is mandatory. It may also be passed a private token, which will
824 * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup().
825 *
826 * This is usable whether or not caching is enabled.
827 */
828int netfs_readpage(struct file *file,
78525c74 829 struct folio *folio,
6a19114b 830 const struct netfs_request_ops *ops,
3d3c9504
DH
831 void *netfs_priv)
832{
6a19114b 833 struct netfs_io_request *rreq;
3d3c9504
DH
834 unsigned int debug_index = 0;
835 int ret;
836
78525c74 837 _enter("%lx", folio_index(folio));
3d3c9504 838
f18a3785 839 rreq = netfs_alloc_request(ops, netfs_priv, file);
3d3c9504
DH
840 if (!rreq) {
841 if (netfs_priv)
3cfef1b6 842 ops->cleanup(folio_file_mapping(folio), netfs_priv);
78525c74 843 folio_unlock(folio);
3d3c9504
DH
844 return -ENOMEM;
845 }
78525c74
DH
846 rreq->mapping = folio_file_mapping(folio);
847 rreq->start = folio_file_pos(folio);
848 rreq->len = folio_size(folio);
3d3c9504 849
726218fd
DH
850 if (ops->begin_cache_operation) {
851 ret = ops->begin_cache_operation(rreq);
852 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) {
78525c74 853 folio_unlock(folio);
726218fd
DH
854 goto out;
855 }
856 }
857
289af54c 858 netfs_stat(&netfs_n_rh_readpage);
77b4d2c6
DH
859 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
860
f18a3785 861 netfs_get_request(rreq);
3d3c9504 862
6a19114b 863 atomic_set(&rreq->nr_outstanding, 1);
3d3c9504
DH
864 do {
865 if (!netfs_rreq_submit_slice(rreq, &debug_index))
866 break;
867
868 } while (rreq->submitted < rreq->len);
869
6a19114b 870 /* Keep nr_outstanding incremented so that the ref always belongs to us, and
3d3c9504
DH
871 * the service code isn't punted off to a random thread pool to
872 * process.
873 */
874 do {
f18a3785
DH
875 wait_var_event(&rreq->nr_outstanding,
876 atomic_read(&rreq->nr_outstanding) == 1);
3d3c9504
DH
877 netfs_rreq_assess(rreq, false);
878 } while (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags));
879
880 ret = rreq->error;
0246f3e5
DH
881 if (ret == 0 && rreq->submitted < rreq->len) {
882 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_readpage);
3d3c9504 883 ret = -EIO;
0246f3e5 884 }
726218fd 885out:
f18a3785 886 netfs_put_request(rreq, false);
3d3c9504
DH
887 return ret;
888}
889EXPORT_SYMBOL(netfs_readpage);
e1b1240c 890
ddca5b0e
DH
891/*
892 * Prepare a folio for writing without reading first
78525c74 893 * @folio: The folio being prepared
827a746f
JL
894 * @pos: starting position for the write
895 * @len: length of write
896 *
897 * In some cases, write_begin doesn't need to read at all:
78525c74
DH
898 * - full folio write
899 * - write that lies in a folio that is completely beyond EOF
900 * - write that covers the folio from start to EOF or beyond it
827a746f
JL
901 *
902 * If any of these criteria are met, then zero out the unwritten parts
78525c74 903 * of the folio and return true. Otherwise, return false.
827a746f 904 */
78525c74 905static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len)
e1b1240c 906{
78525c74 907 struct inode *inode = folio_inode(folio);
827a746f 908 loff_t i_size = i_size_read(inode);
78525c74 909 size_t offset = offset_in_folio(folio, pos);
827a746f 910
78525c74
DH
911 /* Full folio write */
912 if (offset == 0 && len >= folio_size(folio))
827a746f
JL
913 return true;
914
78525c74 915 /* pos beyond last folio in the file */
827a746f
JL
916 if (pos - offset >= i_size)
917 goto zero_out;
918
78525c74 919 /* Write that covers from the start of the folio to EOF or beyond */
827a746f
JL
920 if (offset == 0 && (pos + len) >= i_size)
921 goto zero_out;
e1b1240c 922
827a746f
JL
923 return false;
924zero_out:
78525c74 925 zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio));
827a746f 926 return true;
e1b1240c
DH
927}
928
929/**
930 * netfs_write_begin - Helper to prepare for writing
931 * @file: The file to read from
932 * @mapping: The mapping to read from
933 * @pos: File position at which the write will begin
78525c74
DH
934 * @len: The length of the write (may extend beyond the end of the folio chosen)
935 * @aop_flags: AOP_* flags
936 * @_folio: Where to put the resultant folio
e1b1240c
DH
937 * @_fsdata: Place for the netfs to store a cookie
938 * @ops: The network filesystem's operations for the helper to use
939 * @netfs_priv: Private netfs data to be retained in the request
940 *
941 * Pre-read data for a write-begin request by drawing data from the cache if
942 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
943 * Multiple I/O requests from different sources will get munged together. If
944 * necessary, the readahead window can be expanded in either direction to a
945 * more convenient alighment for RPC efficiency or to make storage in the cache
946 * feasible.
947 *
948 * The calling netfs must provide a table of operations, only one of which,
949 * issue_op, is mandatory.
950 *
951 * The check_write_begin() operation can be provided to check for and flush
78525c74 952 * conflicting writes once the folio is grabbed and locked. It is passed a
e1b1240c
DH
953 * pointer to the fsdata cookie that gets returned to the VM to be passed to
954 * write_end. It is permitted to sleep. It should return 0 if the request
78525c74
DH
955 * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
956 * be regot; or return an error.
e1b1240c
DH
957 *
958 * This is usable whether or not caching is enabled.
959 */
960int netfs_write_begin(struct file *file, struct address_space *mapping,
78525c74
DH
961 loff_t pos, unsigned int len, unsigned int aop_flags,
962 struct folio **_folio, void **_fsdata,
6a19114b 963 const struct netfs_request_ops *ops,
e1b1240c
DH
964 void *netfs_priv)
965{
6a19114b 966 struct netfs_io_request *rreq;
78525c74 967 struct folio *folio;
e1b1240c 968 struct inode *inode = file_inode(file);
78525c74 969 unsigned int debug_index = 0, fgp_flags;
e1b1240c 970 pgoff_t index = pos >> PAGE_SHIFT;
e1b1240c
DH
971 int ret;
972
973 DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
974
975retry:
78525c74
DH
976 fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
977 if (aop_flags & AOP_FLAG_NOFS)
978 fgp_flags |= FGP_NOFS;
979 folio = __filemap_get_folio(mapping, index, fgp_flags,
980 mapping_gfp_mask(mapping));
981 if (!folio)
e1b1240c
DH
982 return -ENOMEM;
983
984 if (ops->check_write_begin) {
985 /* Allow the netfs (eg. ceph) to flush conflicts. */
78525c74 986 ret = ops->check_write_begin(file, pos, len, folio, _fsdata);
e1b1240c 987 if (ret < 0) {
0246f3e5 988 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
e1b1240c
DH
989 if (ret == -EAGAIN)
990 goto retry;
991 goto error;
992 }
993 }
994
78525c74
DH
995 if (folio_test_uptodate(folio))
996 goto have_folio;
e1b1240c
DH
997
998 /* If the page is beyond the EOF, we want to clear it - unless it's
999 * within the cache granule containing the EOF, in which case we need
1000 * to preload the granule.
1001 */
e1b1240c 1002 if (!ops->is_cache_enabled(inode) &&
78525c74 1003 netfs_skip_folio_read(folio, pos, len)) {
e1b1240c 1004 netfs_stat(&netfs_n_rh_write_zskip);
78525c74 1005 goto have_folio_no_wait;
e1b1240c
DH
1006 }
1007
1008 ret = -ENOMEM;
f18a3785 1009 rreq = netfs_alloc_request(ops, netfs_priv, file);
e1b1240c
DH
1010 if (!rreq)
1011 goto error;
78525c74
DH
1012 rreq->mapping = folio_file_mapping(folio);
1013 rreq->start = folio_file_pos(folio);
1014 rreq->len = folio_size(folio);
1015 rreq->no_unlock_folio = folio_index(folio);
1016 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
e1b1240c
DH
1017 netfs_priv = NULL;
1018
726218fd
DH
1019 if (ops->begin_cache_operation) {
1020 ret = ops->begin_cache_operation(rreq);
1021 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
1022 goto error_put;
1023 }
1024
e1b1240c
DH
1025 netfs_stat(&netfs_n_rh_write_begin);
1026 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
1027
1028 /* Expand the request to meet caching requirements and download
1029 * preferences.
1030 */
78525c74 1031 ractl._nr_pages = folio_nr_pages(folio);
e1b1240c 1032 netfs_rreq_expand(rreq, &ractl);
f18a3785 1033 netfs_get_request(rreq);
e1b1240c 1034
78525c74
DH
1035 /* We hold the folio locks, so we can drop the references */
1036 folio_get(folio);
1037 while (readahead_folio(&ractl))
1038 ;
e1b1240c 1039
6a19114b 1040 atomic_set(&rreq->nr_outstanding, 1);
e1b1240c
DH
1041 do {
1042 if (!netfs_rreq_submit_slice(rreq, &debug_index))
1043 break;
1044
1045 } while (rreq->submitted < rreq->len);
1046
f18a3785
DH
1047 /* Keep nr_outstanding incremented so that the ref always belongs to
1048 * us, and the service code isn't punted off to a random thread pool to
e1b1240c
DH
1049 * process.
1050 */
1051 for (;;) {
f18a3785
DH
1052 wait_var_event(&rreq->nr_outstanding,
1053 atomic_read(&rreq->nr_outstanding) == 1);
e1b1240c
DH
1054 netfs_rreq_assess(rreq, false);
1055 if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
1056 break;
1057 cond_resched();
1058 }
1059
1060 ret = rreq->error;
0246f3e5
DH
1061 if (ret == 0 && rreq->submitted < rreq->len) {
1062 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_write_begin);
e1b1240c 1063 ret = -EIO;
0246f3e5 1064 }
f18a3785 1065 netfs_put_request(rreq, false);
e1b1240c
DH
1066 if (ret < 0)
1067 goto error;
1068
78525c74
DH
1069have_folio:
1070 ret = folio_wait_fscache_killable(folio);
e1b1240c
DH
1071 if (ret < 0)
1072 goto error;
78525c74 1073have_folio_no_wait:
e1b1240c 1074 if (netfs_priv)
3cfef1b6 1075 ops->cleanup(mapping, netfs_priv);
78525c74 1076 *_folio = folio;
e1b1240c
DH
1077 _leave(" = 0");
1078 return 0;
1079
1080error_put:
f18a3785 1081 netfs_put_request(rreq, false);
e1b1240c 1082error:
78525c74
DH
1083 folio_unlock(folio);
1084 folio_put(folio);
e1b1240c 1085 if (netfs_priv)
3cfef1b6 1086 ops->cleanup(mapping, netfs_priv);
e1b1240c
DH
1087 _leave(" = %d", ret);
1088 return ret;
1089}
1090EXPORT_SYMBOL(netfs_write_begin);
This page took 0.22213 seconds and 4 git commands to generate.