]>
Commit | Line | Data |
---|---|---|
3d3c9504 DH |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Network filesystem high-level read support. | |
3 | * | |
4 | * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. | |
5 | * Written by David Howells ([email protected]) | |
6 | */ | |
7 | ||
8 | #include <linux/module.h> | |
9 | #include <linux/export.h> | |
10 | #include <linux/fs.h> | |
11 | #include <linux/mm.h> | |
12 | #include <linux/pagemap.h> | |
13 | #include <linux/slab.h> | |
14 | #include <linux/uio.h> | |
15 | #include <linux/sched/mm.h> | |
16 | #include <linux/task_io_accounting_ops.h> | |
17 | #include <linux/netfs.h> | |
18 | #include "internal.h" | |
77b4d2c6 DH |
19 | #define CREATE_TRACE_POINTS |
20 | #include <trace/events/netfs.h> | |
3d3c9504 DH |
21 | |
22 | MODULE_DESCRIPTION("Network fs support"); | |
23 | MODULE_AUTHOR("Red Hat, Inc."); | |
24 | MODULE_LICENSE("GPL"); | |
25 | ||
26 | unsigned netfs_debug; | |
27 | module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); | |
28 | MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); | |
29 | ||
3d3c9504 DH |
30 | /* |
31 | * Clear the unread part of an I/O request. | |
32 | */ | |
6a19114b | 33 | static void netfs_clear_unread(struct netfs_io_subrequest *subreq) |
3d3c9504 DH |
34 | { |
35 | struct iov_iter iter; | |
36 | ||
330de47d | 37 | iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages, |
3d3c9504 DH |
38 | subreq->start + subreq->transferred, |
39 | subreq->len - subreq->transferred); | |
40 | iov_iter_zero(iov_iter_count(&iter), &iter); | |
41 | } | |
42 | ||
726218fd DH |
43 | static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, |
44 | bool was_async) | |
45 | { | |
6a19114b | 46 | struct netfs_io_subrequest *subreq = priv; |
726218fd DH |
47 | |
48 | netfs_subreq_terminated(subreq, transferred_or_error, was_async); | |
49 | } | |
50 | ||
51 | /* | |
52 | * Issue a read against the cache. | |
53 | * - Eats the caller's ref on subreq. | |
54 | */ | |
6a19114b DH |
55 | static void netfs_read_from_cache(struct netfs_io_request *rreq, |
56 | struct netfs_io_subrequest *subreq, | |
3a11b3a8 | 57 | enum netfs_read_from_hole read_hole) |
726218fd DH |
58 | { |
59 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
60 | struct iov_iter iter; | |
61 | ||
62 | netfs_stat(&netfs_n_rh_read); | |
63 | iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, | |
64 | subreq->start + subreq->transferred, | |
65 | subreq->len - subreq->transferred); | |
66 | ||
3a11b3a8 | 67 | cres->ops->read(cres, subreq->start, &iter, read_hole, |
726218fd DH |
68 | netfs_cache_read_terminated, subreq); |
69 | } | |
70 | ||
3d3c9504 DH |
71 | /* |
72 | * Fill a subrequest region with zeroes. | |
73 | */ | |
6a19114b DH |
74 | static void netfs_fill_with_zeroes(struct netfs_io_request *rreq, |
75 | struct netfs_io_subrequest *subreq) | |
3d3c9504 | 76 | { |
289af54c | 77 | netfs_stat(&netfs_n_rh_zero); |
3d3c9504 DH |
78 | __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); |
79 | netfs_subreq_terminated(subreq, 0, false); | |
80 | } | |
81 | ||
82 | /* | |
83 | * Ask the netfs to issue a read request to the server for us. | |
84 | * | |
85 | * The netfs is expected to read from subreq->pos + subreq->transferred to | |
86 | * subreq->pos + subreq->len - 1. It may not backtrack and write data into the | |
87 | * buffer prior to the transferred point as it might clobber dirty data | |
88 | * obtained from the cache. | |
89 | * | |
90 | * Alternatively, the netfs is allowed to indicate one of two things: | |
91 | * | |
92 | * - NETFS_SREQ_SHORT_READ: A short read - it will get called again to try and | |
93 | * make progress. | |
94 | * | |
95 | * - NETFS_SREQ_CLEAR_TAIL: A short read - the rest of the buffer will be | |
96 | * cleared. | |
97 | */ | |
6a19114b DH |
98 | static void netfs_read_from_server(struct netfs_io_request *rreq, |
99 | struct netfs_io_subrequest *subreq) | |
3d3c9504 | 100 | { |
289af54c | 101 | netfs_stat(&netfs_n_rh_download); |
f18a3785 | 102 | rreq->netfs_ops->issue_read(subreq); |
3d3c9504 DH |
103 | } |
104 | ||
105 | /* | |
106 | * Release those waiting. | |
107 | */ | |
6a19114b | 108 | static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async) |
3d3c9504 | 109 | { |
77b4d2c6 | 110 | trace_netfs_rreq(rreq, netfs_rreq_trace_done); |
f18a3785 DH |
111 | netfs_clear_subrequests(rreq, was_async); |
112 | netfs_put_request(rreq, was_async); | |
3d3c9504 DH |
113 | } |
114 | ||
726218fd DH |
115 | /* |
116 | * Deal with the completion of writing the data to the cache. We have to clear | |
78525c74 | 117 | * the PG_fscache bits on the folios involved and release the caller's ref. |
726218fd DH |
118 | * |
119 | * May be called in softirq mode and we inherit a ref from the caller. | |
120 | */ | |
6a19114b | 121 | static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, |
726218fd DH |
122 | bool was_async) |
123 | { | |
6a19114b | 124 | struct netfs_io_subrequest *subreq; |
78525c74 | 125 | struct folio *folio; |
726218fd DH |
126 | pgoff_t unlocked = 0; |
127 | bool have_unlocked = false; | |
128 | ||
129 | rcu_read_lock(); | |
130 | ||
131 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
132 | XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); | |
133 | ||
78525c74 | 134 | xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { |
726218fd | 135 | /* We might have multiple writes from the same huge |
78525c74 | 136 | * folio, but we mustn't unlock a folio more than once. |
726218fd | 137 | */ |
78525c74 | 138 | if (have_unlocked && folio_index(folio) <= unlocked) |
726218fd | 139 | continue; |
78525c74 DH |
140 | unlocked = folio_index(folio); |
141 | folio_end_fscache(folio); | |
726218fd DH |
142 | have_unlocked = true; |
143 | } | |
144 | } | |
145 | ||
146 | rcu_read_unlock(); | |
147 | netfs_rreq_completed(rreq, was_async); | |
148 | } | |
149 | ||
150 | static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, | |
151 | bool was_async) | |
152 | { | |
6a19114b DH |
153 | struct netfs_io_subrequest *subreq = priv; |
154 | struct netfs_io_request *rreq = subreq->rreq; | |
726218fd DH |
155 | |
156 | if (IS_ERR_VALUE(transferred_or_error)) { | |
157 | netfs_stat(&netfs_n_rh_write_failed); | |
0246f3e5 DH |
158 | trace_netfs_failure(rreq, subreq, transferred_or_error, |
159 | netfs_fail_copy_to_cache); | |
726218fd DH |
160 | } else { |
161 | netfs_stat(&netfs_n_rh_write_done); | |
162 | } | |
163 | ||
164 | trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); | |
165 | ||
6a19114b DH |
166 | /* If we decrement nr_copy_ops to 0, the ref belongs to us. */ |
167 | if (atomic_dec_and_test(&rreq->nr_copy_ops)) | |
726218fd DH |
168 | netfs_rreq_unmark_after_write(rreq, was_async); |
169 | ||
170 | netfs_put_subrequest(subreq, was_async); | |
171 | } | |
172 | ||
173 | /* | |
174 | * Perform any outstanding writes to the cache. We inherit a ref from the | |
175 | * caller. | |
176 | */ | |
6a19114b | 177 | static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) |
726218fd DH |
178 | { |
179 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
6a19114b | 180 | struct netfs_io_subrequest *subreq, *next, *p; |
726218fd DH |
181 | struct iov_iter iter; |
182 | int ret; | |
183 | ||
184 | trace_netfs_rreq(rreq, netfs_rreq_trace_write); | |
185 | ||
186 | /* We don't want terminating writes trying to wake us up whilst we're | |
187 | * still going through the list. | |
188 | */ | |
6a19114b | 189 | atomic_inc(&rreq->nr_copy_ops); |
726218fd DH |
190 | |
191 | list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { | |
f18a3785 | 192 | if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { |
726218fd DH |
193 | list_del_init(&subreq->rreq_link); |
194 | netfs_put_subrequest(subreq, false); | |
195 | } | |
196 | } | |
197 | ||
198 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
199 | /* Amalgamate adjacent writes */ | |
200 | while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { | |
201 | next = list_next_entry(subreq, rreq_link); | |
202 | if (next->start != subreq->start + subreq->len) | |
203 | break; | |
204 | subreq->len += next->len; | |
205 | list_del_init(&next->rreq_link); | |
206 | netfs_put_subrequest(next, false); | |
207 | } | |
208 | ||
209 | ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, | |
a39c41b8 | 210 | rreq->i_size, true); |
726218fd | 211 | if (ret < 0) { |
0246f3e5 | 212 | trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); |
726218fd DH |
213 | trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); |
214 | continue; | |
215 | } | |
216 | ||
217 | iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages, | |
218 | subreq->start, subreq->len); | |
219 | ||
6a19114b | 220 | atomic_inc(&rreq->nr_copy_ops); |
726218fd | 221 | netfs_stat(&netfs_n_rh_write); |
f18a3785 | 222 | netfs_get_subrequest(subreq); |
726218fd DH |
223 | trace_netfs_sreq(subreq, netfs_sreq_trace_write); |
224 | cres->ops->write(cres, subreq->start, &iter, | |
225 | netfs_rreq_copy_terminated, subreq); | |
226 | } | |
227 | ||
6a19114b DH |
228 | /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */ |
229 | if (atomic_dec_and_test(&rreq->nr_copy_ops)) | |
726218fd DH |
230 | netfs_rreq_unmark_after_write(rreq, false); |
231 | } | |
232 | ||
233 | static void netfs_rreq_write_to_cache_work(struct work_struct *work) | |
234 | { | |
6a19114b DH |
235 | struct netfs_io_request *rreq = |
236 | container_of(work, struct netfs_io_request, work); | |
726218fd DH |
237 | |
238 | netfs_rreq_do_write_to_cache(rreq); | |
239 | } | |
240 | ||
6a19114b | 241 | static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) |
726218fd | 242 | { |
598ad0bd DH |
243 | rreq->work.func = netfs_rreq_write_to_cache_work; |
244 | if (!queue_work(system_unbound_wq, &rreq->work)) | |
245 | BUG(); | |
726218fd DH |
246 | } |
247 | ||
3d3c9504 | 248 | /* |
78525c74 DH |
249 | * Unlock the folios in a read operation. We need to set PG_fscache on any |
250 | * folios we're going to write back before we unlock them. | |
3d3c9504 | 251 | */ |
6a19114b | 252 | static void netfs_rreq_unlock(struct netfs_io_request *rreq) |
3d3c9504 | 253 | { |
6a19114b | 254 | struct netfs_io_subrequest *subreq; |
78525c74 | 255 | struct folio *folio; |
3d3c9504 DH |
256 | unsigned int iopos, account = 0; |
257 | pgoff_t start_page = rreq->start / PAGE_SIZE; | |
258 | pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; | |
259 | bool subreq_failed = false; | |
3d3c9504 DH |
260 | |
261 | XA_STATE(xas, &rreq->mapping->i_pages, start_page); | |
262 | ||
263 | if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { | |
f18a3785 | 264 | __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); |
3d3c9504 | 265 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { |
f18a3785 | 266 | __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); |
3d3c9504 DH |
267 | } |
268 | } | |
269 | ||
270 | /* Walk through the pagecache and the I/O request lists simultaneously. | |
271 | * We may have a mixture of cached and uncached sections and we only | |
272 | * really want to write out the uncached sections. This is slightly | |
273 | * complicated by the possibility that we might have huge pages with a | |
274 | * mixture inside. | |
275 | */ | |
276 | subreq = list_first_entry(&rreq->subrequests, | |
6a19114b | 277 | struct netfs_io_subrequest, rreq_link); |
3d3c9504 DH |
278 | iopos = 0; |
279 | subreq_failed = (subreq->error < 0); | |
280 | ||
77b4d2c6 DH |
281 | trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); |
282 | ||
3d3c9504 | 283 | rcu_read_lock(); |
78525c74 DH |
284 | xas_for_each(&xas, folio, last_page) { |
285 | unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; | |
286 | unsigned int pgend = pgpos + folio_size(folio); | |
3d3c9504 DH |
287 | bool pg_failed = false; |
288 | ||
289 | for (;;) { | |
290 | if (!subreq) { | |
291 | pg_failed = true; | |
292 | break; | |
293 | } | |
f18a3785 | 294 | if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) |
78525c74 | 295 | folio_start_fscache(folio); |
3d3c9504 DH |
296 | pg_failed |= subreq_failed; |
297 | if (pgend < iopos + subreq->len) | |
298 | break; | |
299 | ||
300 | account += subreq->transferred; | |
301 | iopos += subreq->len; | |
302 | if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { | |
303 | subreq = list_next_entry(subreq, rreq_link); | |
304 | subreq_failed = (subreq->error < 0); | |
305 | } else { | |
306 | subreq = NULL; | |
307 | subreq_failed = false; | |
308 | } | |
309 | if (pgend == iopos) | |
310 | break; | |
311 | } | |
312 | ||
313 | if (!pg_failed) { | |
78525c74 DH |
314 | flush_dcache_folio(folio); |
315 | folio_mark_uptodate(folio); | |
3d3c9504 DH |
316 | } |
317 | ||
78525c74 DH |
318 | if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { |
319 | if (folio_index(folio) == rreq->no_unlock_folio && | |
320 | test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) | |
3d3c9504 DH |
321 | _debug("no unlock"); |
322 | else | |
78525c74 | 323 | folio_unlock(folio); |
3d3c9504 DH |
324 | } |
325 | } | |
326 | rcu_read_unlock(); | |
327 | ||
328 | task_io_account_read(account); | |
329 | if (rreq->netfs_ops->done) | |
330 | rreq->netfs_ops->done(rreq); | |
331 | } | |
332 | ||
333 | /* | |
334 | * Handle a short read. | |
335 | */ | |
6a19114b DH |
336 | static void netfs_rreq_short_read(struct netfs_io_request *rreq, |
337 | struct netfs_io_subrequest *subreq) | |
3d3c9504 | 338 | { |
f18a3785 | 339 | __clear_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); |
3d3c9504 DH |
340 | __set_bit(NETFS_SREQ_SEEK_DATA_READ, &subreq->flags); |
341 | ||
289af54c | 342 | netfs_stat(&netfs_n_rh_short_read); |
77b4d2c6 DH |
343 | trace_netfs_sreq(subreq, netfs_sreq_trace_resubmit_short); |
344 | ||
f18a3785 | 345 | netfs_get_subrequest(subreq); |
6a19114b | 346 | atomic_inc(&rreq->nr_outstanding); |
726218fd | 347 | if (subreq->source == NETFS_READ_FROM_CACHE) |
3a11b3a8 | 348 | netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_CLEAR); |
726218fd DH |
349 | else |
350 | netfs_read_from_server(rreq, subreq); | |
3d3c9504 DH |
351 | } |
352 | ||
353 | /* | |
354 | * Resubmit any short or failed operations. Returns true if we got the rreq | |
355 | * ref back. | |
356 | */ | |
6a19114b | 357 | static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq) |
3d3c9504 | 358 | { |
6a19114b | 359 | struct netfs_io_subrequest *subreq; |
3d3c9504 DH |
360 | |
361 | WARN_ON(in_interrupt()); | |
362 | ||
77b4d2c6 DH |
363 | trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); |
364 | ||
3d3c9504 DH |
365 | /* We don't want terminating submissions trying to wake us up whilst |
366 | * we're still going through the list. | |
367 | */ | |
6a19114b | 368 | atomic_inc(&rreq->nr_outstanding); |
3d3c9504 DH |
369 | |
370 | __clear_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); | |
371 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
372 | if (subreq->error) { | |
373 | if (subreq->source != NETFS_READ_FROM_CACHE) | |
374 | break; | |
375 | subreq->source = NETFS_DOWNLOAD_FROM_SERVER; | |
376 | subreq->error = 0; | |
289af54c | 377 | netfs_stat(&netfs_n_rh_download_instead); |
77b4d2c6 | 378 | trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead); |
f18a3785 | 379 | netfs_get_subrequest(subreq); |
6a19114b | 380 | atomic_inc(&rreq->nr_outstanding); |
3d3c9504 | 381 | netfs_read_from_server(rreq, subreq); |
f18a3785 | 382 | } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) { |
3d3c9504 DH |
383 | netfs_rreq_short_read(rreq, subreq); |
384 | } | |
385 | } | |
386 | ||
6a19114b DH |
387 | /* If we decrement nr_outstanding to 0, the usage ref belongs to us. */ |
388 | if (atomic_dec_and_test(&rreq->nr_outstanding)) | |
3d3c9504 DH |
389 | return true; |
390 | ||
6a19114b | 391 | wake_up_var(&rreq->nr_outstanding); |
3d3c9504 DH |
392 | return false; |
393 | } | |
394 | ||
726218fd DH |
395 | /* |
396 | * Check to see if the data read is still valid. | |
397 | */ | |
6a19114b | 398 | static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq) |
726218fd | 399 | { |
6a19114b | 400 | struct netfs_io_subrequest *subreq; |
726218fd DH |
401 | |
402 | if (!rreq->netfs_ops->is_still_valid || | |
403 | rreq->netfs_ops->is_still_valid(rreq)) | |
404 | return; | |
405 | ||
406 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
407 | if (subreq->source == NETFS_READ_FROM_CACHE) { | |
408 | subreq->error = -ESTALE; | |
409 | __set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); | |
410 | } | |
411 | } | |
412 | } | |
413 | ||
3d3c9504 DH |
414 | /* |
415 | * Assess the state of a read request and decide what to do next. | |
416 | * | |
417 | * Note that we could be in an ordinary kernel thread, on a workqueue or in | |
418 | * softirq context at this point. We inherit a ref from the caller. | |
419 | */ | |
6a19114b | 420 | static void netfs_rreq_assess(struct netfs_io_request *rreq, bool was_async) |
3d3c9504 | 421 | { |
77b4d2c6 DH |
422 | trace_netfs_rreq(rreq, netfs_rreq_trace_assess); |
423 | ||
3d3c9504 | 424 | again: |
726218fd DH |
425 | netfs_rreq_is_still_valid(rreq); |
426 | ||
3d3c9504 DH |
427 | if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) && |
428 | test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) { | |
429 | if (netfs_rreq_perform_resubmissions(rreq)) | |
430 | goto again; | |
431 | return; | |
432 | } | |
433 | ||
434 | netfs_rreq_unlock(rreq); | |
435 | ||
436 | clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); | |
437 | wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); | |
438 | ||
f18a3785 | 439 | if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) |
598ad0bd | 440 | return netfs_rreq_write_to_cache(rreq); |
726218fd | 441 | |
3d3c9504 DH |
442 | netfs_rreq_completed(rreq, was_async); |
443 | } | |
444 | ||
3a4a38e6 | 445 | void netfs_rreq_work(struct work_struct *work) |
3d3c9504 | 446 | { |
6a19114b DH |
447 | struct netfs_io_request *rreq = |
448 | container_of(work, struct netfs_io_request, work); | |
3d3c9504 DH |
449 | netfs_rreq_assess(rreq, false); |
450 | } | |
451 | ||
452 | /* | |
453 | * Handle the completion of all outstanding I/O operations on a read request. | |
454 | * We inherit a ref from the caller. | |
455 | */ | |
6a19114b | 456 | static void netfs_rreq_terminated(struct netfs_io_request *rreq, |
3d3c9504 DH |
457 | bool was_async) |
458 | { | |
459 | if (test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags) && | |
460 | was_async) { | |
461 | if (!queue_work(system_unbound_wq, &rreq->work)) | |
462 | BUG(); | |
463 | } else { | |
464 | netfs_rreq_assess(rreq, was_async); | |
465 | } | |
466 | } | |
467 | ||
468 | /** | |
469 | * netfs_subreq_terminated - Note the termination of an I/O operation. | |
470 | * @subreq: The I/O request that has terminated. | |
471 | * @transferred_or_error: The amount of data transferred or an error code. | |
472 | * @was_async: The termination was asynchronous | |
473 | * | |
474 | * This tells the read helper that a contributory I/O operation has terminated, | |
475 | * one way or another, and that it should integrate the results. | |
476 | * | |
477 | * The caller indicates in @transferred_or_error the outcome of the operation, | |
478 | * supplying a positive value to indicate the number of bytes transferred, 0 to | |
479 | * indicate a failure to transfer anything that should be retried or a negative | |
480 | * error code. The helper will look after reissuing I/O operations as | |
481 | * appropriate and writing downloaded data to the cache. | |
482 | * | |
483 | * If @was_async is true, the caller might be running in softirq or interrupt | |
484 | * context and we can't sleep. | |
485 | */ | |
6a19114b | 486 | void netfs_subreq_terminated(struct netfs_io_subrequest *subreq, |
3d3c9504 DH |
487 | ssize_t transferred_or_error, |
488 | bool was_async) | |
489 | { | |
6a19114b | 490 | struct netfs_io_request *rreq = subreq->rreq; |
3d3c9504 DH |
491 | int u; |
492 | ||
493 | _enter("[%u]{%llx,%lx},%zd", | |
494 | subreq->debug_index, subreq->start, subreq->flags, | |
495 | transferred_or_error); | |
496 | ||
289af54c DH |
497 | switch (subreq->source) { |
498 | case NETFS_READ_FROM_CACHE: | |
499 | netfs_stat(&netfs_n_rh_read_done); | |
500 | break; | |
501 | case NETFS_DOWNLOAD_FROM_SERVER: | |
502 | netfs_stat(&netfs_n_rh_download_done); | |
503 | break; | |
504 | default: | |
505 | break; | |
506 | } | |
507 | ||
3d3c9504 DH |
508 | if (IS_ERR_VALUE(transferred_or_error)) { |
509 | subreq->error = transferred_or_error; | |
0246f3e5 DH |
510 | trace_netfs_failure(rreq, subreq, transferred_or_error, |
511 | netfs_fail_read); | |
3d3c9504 DH |
512 | goto failed; |
513 | } | |
514 | ||
515 | if (WARN(transferred_or_error > subreq->len - subreq->transferred, | |
516 | "Subreq overread: R%x[%x] %zd > %zu - %zu", | |
517 | rreq->debug_id, subreq->debug_index, | |
518 | transferred_or_error, subreq->len, subreq->transferred)) | |
519 | transferred_or_error = subreq->len - subreq->transferred; | |
520 | ||
521 | subreq->error = 0; | |
522 | subreq->transferred += transferred_or_error; | |
523 | if (subreq->transferred < subreq->len) | |
524 | goto incomplete; | |
525 | ||
526 | complete: | |
527 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); | |
f18a3785 DH |
528 | if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) |
529 | set_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); | |
3d3c9504 DH |
530 | |
531 | out: | |
77b4d2c6 DH |
532 | trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); |
533 | ||
6a19114b DH |
534 | /* If we decrement nr_outstanding to 0, the ref belongs to us. */ |
535 | u = atomic_dec_return(&rreq->nr_outstanding); | |
3d3c9504 DH |
536 | if (u == 0) |
537 | netfs_rreq_terminated(rreq, was_async); | |
538 | else if (u == 1) | |
6a19114b | 539 | wake_up_var(&rreq->nr_outstanding); |
3d3c9504 DH |
540 | |
541 | netfs_put_subrequest(subreq, was_async); | |
542 | return; | |
543 | ||
544 | incomplete: | |
545 | if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { | |
546 | netfs_clear_unread(subreq); | |
547 | subreq->transferred = subreq->len; | |
548 | goto complete; | |
549 | } | |
550 | ||
551 | if (transferred_or_error == 0) { | |
552 | if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { | |
553 | subreq->error = -ENODATA; | |
554 | goto failed; | |
555 | } | |
556 | } else { | |
557 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); | |
558 | } | |
559 | ||
f18a3785 | 560 | __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); |
3d3c9504 DH |
561 | set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); |
562 | goto out; | |
563 | ||
564 | failed: | |
565 | if (subreq->source == NETFS_READ_FROM_CACHE) { | |
289af54c | 566 | netfs_stat(&netfs_n_rh_read_failed); |
3d3c9504 DH |
567 | set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); |
568 | } else { | |
289af54c | 569 | netfs_stat(&netfs_n_rh_download_failed); |
3d3c9504 DH |
570 | set_bit(NETFS_RREQ_FAILED, &rreq->flags); |
571 | rreq->error = subreq->error; | |
572 | } | |
573 | goto out; | |
574 | } | |
575 | EXPORT_SYMBOL(netfs_subreq_terminated); | |
576 | ||
6a19114b | 577 | static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_subrequest *subreq, |
3d3c9504 DH |
578 | loff_t i_size) |
579 | { | |
6a19114b | 580 | struct netfs_io_request *rreq = subreq->rreq; |
726218fd | 581 | struct netfs_cache_resources *cres = &rreq->cache_resources; |
3d3c9504 | 582 | |
726218fd DH |
583 | if (cres->ops) |
584 | return cres->ops->prepare_read(subreq, i_size); | |
3d3c9504 DH |
585 | if (subreq->start >= rreq->i_size) |
586 | return NETFS_FILL_WITH_ZEROES; | |
587 | return NETFS_DOWNLOAD_FROM_SERVER; | |
588 | } | |
589 | ||
590 | /* | |
591 | * Work out what sort of subrequest the next one will be. | |
592 | */ | |
6a19114b DH |
593 | static enum netfs_io_source |
594 | netfs_rreq_prepare_read(struct netfs_io_request *rreq, | |
595 | struct netfs_io_subrequest *subreq) | |
3d3c9504 | 596 | { |
6a19114b | 597 | enum netfs_io_source source; |
3d3c9504 DH |
598 | |
599 | _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); | |
600 | ||
601 | source = netfs_cache_prepare_read(subreq, rreq->i_size); | |
602 | if (source == NETFS_INVALID_READ) | |
603 | goto out; | |
604 | ||
605 | if (source == NETFS_DOWNLOAD_FROM_SERVER) { | |
606 | /* Call out to the netfs to let it shrink the request to fit | |
607 | * its own I/O sizes and boundaries. If it shinks it here, it | |
608 | * will be called again to make simultaneous calls; if it wants | |
609 | * to make serial calls, it can indicate a short read and then | |
610 | * we will call it again. | |
611 | */ | |
612 | if (subreq->len > rreq->i_size - subreq->start) | |
613 | subreq->len = rreq->i_size - subreq->start; | |
614 | ||
615 | if (rreq->netfs_ops->clamp_length && | |
616 | !rreq->netfs_ops->clamp_length(subreq)) { | |
617 | source = NETFS_INVALID_READ; | |
618 | goto out; | |
619 | } | |
620 | } | |
621 | ||
622 | if (WARN_ON(subreq->len == 0)) | |
623 | source = NETFS_INVALID_READ; | |
624 | ||
625 | out: | |
626 | subreq->source = source; | |
77b4d2c6 | 627 | trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); |
3d3c9504 DH |
628 | return source; |
629 | } | |
630 | ||
631 | /* | |
632 | * Slice off a piece of a read request and submit an I/O request for it. | |
633 | */ | |
6a19114b | 634 | static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, |
3d3c9504 DH |
635 | unsigned int *_debug_index) |
636 | { | |
6a19114b DH |
637 | struct netfs_io_subrequest *subreq; |
638 | enum netfs_io_source source; | |
3d3c9504 DH |
639 | |
640 | subreq = netfs_alloc_subrequest(rreq); | |
641 | if (!subreq) | |
642 | return false; | |
643 | ||
644 | subreq->debug_index = (*_debug_index)++; | |
645 | subreq->start = rreq->start + rreq->submitted; | |
646 | subreq->len = rreq->len - rreq->submitted; | |
647 | ||
648 | _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted); | |
649 | list_add_tail(&subreq->rreq_link, &rreq->subrequests); | |
650 | ||
651 | /* Call out to the cache to find out what it can do with the remaining | |
652 | * subset. It tells us in subreq->flags what it decided should be done | |
653 | * and adjusts subreq->len down if the subset crosses a cache boundary. | |
654 | * | |
655 | * Then when we hand the subset, it can choose to take a subset of that | |
656 | * (the starts must coincide), in which case, we go around the loop | |
657 | * again and ask it to download the next piece. | |
658 | */ | |
659 | source = netfs_rreq_prepare_read(rreq, subreq); | |
660 | if (source == NETFS_INVALID_READ) | |
661 | goto subreq_failed; | |
662 | ||
6a19114b | 663 | atomic_inc(&rreq->nr_outstanding); |
3d3c9504 DH |
664 | |
665 | rreq->submitted += subreq->len; | |
666 | ||
77b4d2c6 | 667 | trace_netfs_sreq(subreq, netfs_sreq_trace_submit); |
3d3c9504 DH |
668 | switch (source) { |
669 | case NETFS_FILL_WITH_ZEROES: | |
670 | netfs_fill_with_zeroes(rreq, subreq); | |
671 | break; | |
672 | case NETFS_DOWNLOAD_FROM_SERVER: | |
673 | netfs_read_from_server(rreq, subreq); | |
674 | break; | |
726218fd | 675 | case NETFS_READ_FROM_CACHE: |
3a11b3a8 | 676 | netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_IGNORE); |
726218fd | 677 | break; |
3d3c9504 DH |
678 | default: |
679 | BUG(); | |
680 | } | |
681 | ||
682 | return true; | |
683 | ||
684 | subreq_failed: | |
685 | rreq->error = subreq->error; | |
686 | netfs_put_subrequest(subreq, false); | |
687 | return false; | |
688 | } | |
689 | ||
6a19114b | 690 | static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, |
726218fd DH |
691 | loff_t *_start, size_t *_len, loff_t i_size) |
692 | { | |
693 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
694 | ||
695 | if (cres->ops && cres->ops->expand_readahead) | |
696 | cres->ops->expand_readahead(cres, _start, _len, i_size); | |
697 | } | |
698 | ||
6a19114b | 699 | static void netfs_rreq_expand(struct netfs_io_request *rreq, |
3d3c9504 DH |
700 | struct readahead_control *ractl) |
701 | { | |
726218fd DH |
702 | /* Give the cache a chance to change the request parameters. The |
703 | * resultant request must contain the original region. | |
704 | */ | |
705 | netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); | |
706 | ||
3d3c9504 DH |
707 | /* Give the netfs a chance to change the request parameters. The |
708 | * resultant request must contain the original region. | |
709 | */ | |
710 | if (rreq->netfs_ops->expand_readahead) | |
711 | rreq->netfs_ops->expand_readahead(rreq); | |
712 | ||
713 | /* Expand the request if the cache wants it to start earlier. Note | |
714 | * that the expansion may get further extended if the VM wishes to | |
715 | * insert THPs and the preferred start and/or end wind up in the middle | |
716 | * of THPs. | |
717 | * | |
718 | * If this is the case, however, the THP size should be an integer | |
719 | * multiple of the cache granule size, so we get a whole number of | |
720 | * granules to deal with. | |
721 | */ | |
722 | if (rreq->start != readahead_pos(ractl) || | |
723 | rreq->len != readahead_length(ractl)) { | |
724 | readahead_expand(ractl, rreq->start, rreq->len); | |
725 | rreq->start = readahead_pos(ractl); | |
726 | rreq->len = readahead_length(ractl); | |
77b4d2c6 DH |
727 | |
728 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), | |
729 | netfs_read_trace_expanded); | |
3d3c9504 DH |
730 | } |
731 | } | |
732 | ||
733 | /** | |
734 | * netfs_readahead - Helper to manage a read request | |
735 | * @ractl: The description of the readahead request | |
736 | * @ops: The network filesystem's operations for the helper to use | |
737 | * @netfs_priv: Private netfs data to be retained in the request | |
738 | * | |
739 | * Fulfil a readahead request by drawing data from the cache if possible, or | |
740 | * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O | |
741 | * requests from different sources will get munged together. If necessary, the | |
742 | * readahead window can be expanded in either direction to a more convenient | |
743 | * alighment for RPC efficiency or to make storage in the cache feasible. | |
744 | * | |
745 | * The calling netfs must provide a table of operations, only one of which, | |
746 | * issue_op, is mandatory. It may also be passed a private token, which will | |
747 | * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup(). | |
748 | * | |
749 | * This is usable whether or not caching is enabled. | |
750 | */ | |
751 | void netfs_readahead(struct readahead_control *ractl, | |
6a19114b | 752 | const struct netfs_request_ops *ops, |
3d3c9504 DH |
753 | void *netfs_priv) |
754 | { | |
6a19114b | 755 | struct netfs_io_request *rreq; |
3d3c9504 | 756 | unsigned int debug_index = 0; |
726218fd | 757 | int ret; |
3d3c9504 DH |
758 | |
759 | _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); | |
760 | ||
761 | if (readahead_count(ractl) == 0) | |
762 | goto cleanup; | |
763 | ||
f18a3785 | 764 | rreq = netfs_alloc_request(ops, netfs_priv, ractl->file); |
3d3c9504 DH |
765 | if (!rreq) |
766 | goto cleanup; | |
767 | rreq->mapping = ractl->mapping; | |
768 | rreq->start = readahead_pos(ractl); | |
769 | rreq->len = readahead_length(ractl); | |
770 | ||
726218fd DH |
771 | if (ops->begin_cache_operation) { |
772 | ret = ops->begin_cache_operation(rreq); | |
773 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
774 | goto cleanup_free; | |
775 | } | |
776 | ||
289af54c | 777 | netfs_stat(&netfs_n_rh_readahead); |
77b4d2c6 DH |
778 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), |
779 | netfs_read_trace_readahead); | |
780 | ||
3d3c9504 DH |
781 | netfs_rreq_expand(rreq, ractl); |
782 | ||
6a19114b | 783 | atomic_set(&rreq->nr_outstanding, 1); |
3d3c9504 DH |
784 | do { |
785 | if (!netfs_rreq_submit_slice(rreq, &debug_index)) | |
786 | break; | |
787 | ||
788 | } while (rreq->submitted < rreq->len); | |
789 | ||
78525c74 | 790 | /* Drop the refs on the folios here rather than in the cache or |
3d3c9504 DH |
791 | * filesystem. The locks will be dropped in netfs_rreq_unlock(). |
792 | */ | |
78525c74 DH |
793 | while (readahead_folio(ractl)) |
794 | ; | |
3d3c9504 | 795 | |
6a19114b DH |
796 | /* If we decrement nr_outstanding to 0, the ref belongs to us. */ |
797 | if (atomic_dec_and_test(&rreq->nr_outstanding)) | |
3d3c9504 DH |
798 | netfs_rreq_assess(rreq, false); |
799 | return; | |
800 | ||
726218fd | 801 | cleanup_free: |
f18a3785 | 802 | netfs_put_request(rreq, false); |
726218fd | 803 | return; |
3d3c9504 DH |
804 | cleanup: |
805 | if (netfs_priv) | |
806 | ops->cleanup(ractl->mapping, netfs_priv); | |
807 | return; | |
808 | } | |
809 | EXPORT_SYMBOL(netfs_readahead); | |
810 | ||
811 | /** | |
53b776c7 | 812 | * netfs_readpage - Helper to manage a readpage request |
3d3c9504 | 813 | * @file: The file to read from |
78525c74 | 814 | * @folio: The folio to read |
3d3c9504 DH |
815 | * @ops: The network filesystem's operations for the helper to use |
816 | * @netfs_priv: Private netfs data to be retained in the request | |
817 | * | |
818 | * Fulfil a readpage request by drawing data from the cache if possible, or the | |
819 | * netfs if not. Space beyond the EOF is zero-filled. Multiple I/O requests | |
820 | * from different sources will get munged together. | |
821 | * | |
822 | * The calling netfs must provide a table of operations, only one of which, | |
823 | * issue_op, is mandatory. It may also be passed a private token, which will | |
824 | * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup(). | |
825 | * | |
826 | * This is usable whether or not caching is enabled. | |
827 | */ | |
828 | int netfs_readpage(struct file *file, | |
78525c74 | 829 | struct folio *folio, |
6a19114b | 830 | const struct netfs_request_ops *ops, |
3d3c9504 DH |
831 | void *netfs_priv) |
832 | { | |
6a19114b | 833 | struct netfs_io_request *rreq; |
3d3c9504 DH |
834 | unsigned int debug_index = 0; |
835 | int ret; | |
836 | ||
78525c74 | 837 | _enter("%lx", folio_index(folio)); |
3d3c9504 | 838 | |
f18a3785 | 839 | rreq = netfs_alloc_request(ops, netfs_priv, file); |
3d3c9504 DH |
840 | if (!rreq) { |
841 | if (netfs_priv) | |
3cfef1b6 | 842 | ops->cleanup(folio_file_mapping(folio), netfs_priv); |
78525c74 | 843 | folio_unlock(folio); |
3d3c9504 DH |
844 | return -ENOMEM; |
845 | } | |
78525c74 DH |
846 | rreq->mapping = folio_file_mapping(folio); |
847 | rreq->start = folio_file_pos(folio); | |
848 | rreq->len = folio_size(folio); | |
3d3c9504 | 849 | |
726218fd DH |
850 | if (ops->begin_cache_operation) { |
851 | ret = ops->begin_cache_operation(rreq); | |
852 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) { | |
78525c74 | 853 | folio_unlock(folio); |
726218fd DH |
854 | goto out; |
855 | } | |
856 | } | |
857 | ||
289af54c | 858 | netfs_stat(&netfs_n_rh_readpage); |
77b4d2c6 DH |
859 | trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); |
860 | ||
f18a3785 | 861 | netfs_get_request(rreq); |
3d3c9504 | 862 | |
6a19114b | 863 | atomic_set(&rreq->nr_outstanding, 1); |
3d3c9504 DH |
864 | do { |
865 | if (!netfs_rreq_submit_slice(rreq, &debug_index)) | |
866 | break; | |
867 | ||
868 | } while (rreq->submitted < rreq->len); | |
869 | ||
6a19114b | 870 | /* Keep nr_outstanding incremented so that the ref always belongs to us, and |
3d3c9504 DH |
871 | * the service code isn't punted off to a random thread pool to |
872 | * process. | |
873 | */ | |
874 | do { | |
f18a3785 DH |
875 | wait_var_event(&rreq->nr_outstanding, |
876 | atomic_read(&rreq->nr_outstanding) == 1); | |
3d3c9504 DH |
877 | netfs_rreq_assess(rreq, false); |
878 | } while (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)); | |
879 | ||
880 | ret = rreq->error; | |
0246f3e5 DH |
881 | if (ret == 0 && rreq->submitted < rreq->len) { |
882 | trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_readpage); | |
3d3c9504 | 883 | ret = -EIO; |
0246f3e5 | 884 | } |
726218fd | 885 | out: |
f18a3785 | 886 | netfs_put_request(rreq, false); |
3d3c9504 DH |
887 | return ret; |
888 | } | |
889 | EXPORT_SYMBOL(netfs_readpage); | |
e1b1240c | 890 | |
ddca5b0e DH |
891 | /* |
892 | * Prepare a folio for writing without reading first | |
78525c74 | 893 | * @folio: The folio being prepared |
827a746f JL |
894 | * @pos: starting position for the write |
895 | * @len: length of write | |
896 | * | |
897 | * In some cases, write_begin doesn't need to read at all: | |
78525c74 DH |
898 | * - full folio write |
899 | * - write that lies in a folio that is completely beyond EOF | |
900 | * - write that covers the folio from start to EOF or beyond it | |
827a746f JL |
901 | * |
902 | * If any of these criteria are met, then zero out the unwritten parts | |
78525c74 | 903 | * of the folio and return true. Otherwise, return false. |
827a746f | 904 | */ |
78525c74 | 905 | static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len) |
e1b1240c | 906 | { |
78525c74 | 907 | struct inode *inode = folio_inode(folio); |
827a746f | 908 | loff_t i_size = i_size_read(inode); |
78525c74 | 909 | size_t offset = offset_in_folio(folio, pos); |
827a746f | 910 | |
78525c74 DH |
911 | /* Full folio write */ |
912 | if (offset == 0 && len >= folio_size(folio)) | |
827a746f JL |
913 | return true; |
914 | ||
78525c74 | 915 | /* pos beyond last folio in the file */ |
827a746f JL |
916 | if (pos - offset >= i_size) |
917 | goto zero_out; | |
918 | ||
78525c74 | 919 | /* Write that covers from the start of the folio to EOF or beyond */ |
827a746f JL |
920 | if (offset == 0 && (pos + len) >= i_size) |
921 | goto zero_out; | |
e1b1240c | 922 | |
827a746f JL |
923 | return false; |
924 | zero_out: | |
78525c74 | 925 | zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio)); |
827a746f | 926 | return true; |
e1b1240c DH |
927 | } |
928 | ||
929 | /** | |
930 | * netfs_write_begin - Helper to prepare for writing | |
931 | * @file: The file to read from | |
932 | * @mapping: The mapping to read from | |
933 | * @pos: File position at which the write will begin | |
78525c74 DH |
934 | * @len: The length of the write (may extend beyond the end of the folio chosen) |
935 | * @aop_flags: AOP_* flags | |
936 | * @_folio: Where to put the resultant folio | |
e1b1240c DH |
937 | * @_fsdata: Place for the netfs to store a cookie |
938 | * @ops: The network filesystem's operations for the helper to use | |
939 | * @netfs_priv: Private netfs data to be retained in the request | |
940 | * | |
941 | * Pre-read data for a write-begin request by drawing data from the cache if | |
942 | * possible, or the netfs if not. Space beyond the EOF is zero-filled. | |
943 | * Multiple I/O requests from different sources will get munged together. If | |
944 | * necessary, the readahead window can be expanded in either direction to a | |
945 | * more convenient alighment for RPC efficiency or to make storage in the cache | |
946 | * feasible. | |
947 | * | |
948 | * The calling netfs must provide a table of operations, only one of which, | |
949 | * issue_op, is mandatory. | |
950 | * | |
951 | * The check_write_begin() operation can be provided to check for and flush | |
78525c74 | 952 | * conflicting writes once the folio is grabbed and locked. It is passed a |
e1b1240c DH |
953 | * pointer to the fsdata cookie that gets returned to the VM to be passed to |
954 | * write_end. It is permitted to sleep. It should return 0 if the request | |
78525c74 DH |
955 | * should go ahead; unlock the folio and return -EAGAIN to cause the folio to |
956 | * be regot; or return an error. | |
e1b1240c DH |
957 | * |
958 | * This is usable whether or not caching is enabled. | |
959 | */ | |
960 | int netfs_write_begin(struct file *file, struct address_space *mapping, | |
78525c74 DH |
961 | loff_t pos, unsigned int len, unsigned int aop_flags, |
962 | struct folio **_folio, void **_fsdata, | |
6a19114b | 963 | const struct netfs_request_ops *ops, |
e1b1240c DH |
964 | void *netfs_priv) |
965 | { | |
6a19114b | 966 | struct netfs_io_request *rreq; |
78525c74 | 967 | struct folio *folio; |
e1b1240c | 968 | struct inode *inode = file_inode(file); |
78525c74 | 969 | unsigned int debug_index = 0, fgp_flags; |
e1b1240c | 970 | pgoff_t index = pos >> PAGE_SHIFT; |
e1b1240c DH |
971 | int ret; |
972 | ||
973 | DEFINE_READAHEAD(ractl, file, NULL, mapping, index); | |
974 | ||
975 | retry: | |
78525c74 DH |
976 | fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; |
977 | if (aop_flags & AOP_FLAG_NOFS) | |
978 | fgp_flags |= FGP_NOFS; | |
979 | folio = __filemap_get_folio(mapping, index, fgp_flags, | |
980 | mapping_gfp_mask(mapping)); | |
981 | if (!folio) | |
e1b1240c DH |
982 | return -ENOMEM; |
983 | ||
984 | if (ops->check_write_begin) { | |
985 | /* Allow the netfs (eg. ceph) to flush conflicts. */ | |
78525c74 | 986 | ret = ops->check_write_begin(file, pos, len, folio, _fsdata); |
e1b1240c | 987 | if (ret < 0) { |
0246f3e5 | 988 | trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); |
e1b1240c DH |
989 | if (ret == -EAGAIN) |
990 | goto retry; | |
991 | goto error; | |
992 | } | |
993 | } | |
994 | ||
78525c74 DH |
995 | if (folio_test_uptodate(folio)) |
996 | goto have_folio; | |
e1b1240c DH |
997 | |
998 | /* If the page is beyond the EOF, we want to clear it - unless it's | |
999 | * within the cache granule containing the EOF, in which case we need | |
1000 | * to preload the granule. | |
1001 | */ | |
e1b1240c | 1002 | if (!ops->is_cache_enabled(inode) && |
78525c74 | 1003 | netfs_skip_folio_read(folio, pos, len)) { |
e1b1240c | 1004 | netfs_stat(&netfs_n_rh_write_zskip); |
78525c74 | 1005 | goto have_folio_no_wait; |
e1b1240c DH |
1006 | } |
1007 | ||
1008 | ret = -ENOMEM; | |
f18a3785 | 1009 | rreq = netfs_alloc_request(ops, netfs_priv, file); |
e1b1240c DH |
1010 | if (!rreq) |
1011 | goto error; | |
78525c74 DH |
1012 | rreq->mapping = folio_file_mapping(folio); |
1013 | rreq->start = folio_file_pos(folio); | |
1014 | rreq->len = folio_size(folio); | |
1015 | rreq->no_unlock_folio = folio_index(folio); | |
1016 | __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); | |
e1b1240c DH |
1017 | netfs_priv = NULL; |
1018 | ||
726218fd DH |
1019 | if (ops->begin_cache_operation) { |
1020 | ret = ops->begin_cache_operation(rreq); | |
1021 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
1022 | goto error_put; | |
1023 | } | |
1024 | ||
e1b1240c DH |
1025 | netfs_stat(&netfs_n_rh_write_begin); |
1026 | trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); | |
1027 | ||
1028 | /* Expand the request to meet caching requirements and download | |
1029 | * preferences. | |
1030 | */ | |
78525c74 | 1031 | ractl._nr_pages = folio_nr_pages(folio); |
e1b1240c | 1032 | netfs_rreq_expand(rreq, &ractl); |
f18a3785 | 1033 | netfs_get_request(rreq); |
e1b1240c | 1034 | |
78525c74 DH |
1035 | /* We hold the folio locks, so we can drop the references */ |
1036 | folio_get(folio); | |
1037 | while (readahead_folio(&ractl)) | |
1038 | ; | |
e1b1240c | 1039 | |
6a19114b | 1040 | atomic_set(&rreq->nr_outstanding, 1); |
e1b1240c DH |
1041 | do { |
1042 | if (!netfs_rreq_submit_slice(rreq, &debug_index)) | |
1043 | break; | |
1044 | ||
1045 | } while (rreq->submitted < rreq->len); | |
1046 | ||
f18a3785 DH |
1047 | /* Keep nr_outstanding incremented so that the ref always belongs to |
1048 | * us, and the service code isn't punted off to a random thread pool to | |
e1b1240c DH |
1049 | * process. |
1050 | */ | |
1051 | for (;;) { | |
f18a3785 DH |
1052 | wait_var_event(&rreq->nr_outstanding, |
1053 | atomic_read(&rreq->nr_outstanding) == 1); | |
e1b1240c DH |
1054 | netfs_rreq_assess(rreq, false); |
1055 | if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) | |
1056 | break; | |
1057 | cond_resched(); | |
1058 | } | |
1059 | ||
1060 | ret = rreq->error; | |
0246f3e5 DH |
1061 | if (ret == 0 && rreq->submitted < rreq->len) { |
1062 | trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_write_begin); | |
e1b1240c | 1063 | ret = -EIO; |
0246f3e5 | 1064 | } |
f18a3785 | 1065 | netfs_put_request(rreq, false); |
e1b1240c DH |
1066 | if (ret < 0) |
1067 | goto error; | |
1068 | ||
78525c74 DH |
1069 | have_folio: |
1070 | ret = folio_wait_fscache_killable(folio); | |
e1b1240c DH |
1071 | if (ret < 0) |
1072 | goto error; | |
78525c74 | 1073 | have_folio_no_wait: |
e1b1240c | 1074 | if (netfs_priv) |
3cfef1b6 | 1075 | ops->cleanup(mapping, netfs_priv); |
78525c74 | 1076 | *_folio = folio; |
e1b1240c DH |
1077 | _leave(" = 0"); |
1078 | return 0; | |
1079 | ||
1080 | error_put: | |
f18a3785 | 1081 | netfs_put_request(rreq, false); |
e1b1240c | 1082 | error: |
78525c74 DH |
1083 | folio_unlock(folio); |
1084 | folio_put(folio); | |
e1b1240c | 1085 | if (netfs_priv) |
3cfef1b6 | 1086 | ops->cleanup(mapping, netfs_priv); |
e1b1240c DH |
1087 | _leave(" = %d", ret); |
1088 | return ret; | |
1089 | } | |
1090 | EXPORT_SYMBOL(netfs_write_begin); |