]>
Commit | Line | Data |
---|---|---|
3d3c9504 DH |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Network filesystem high-level read support. | |
3 | * | |
4 | * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. | |
5 | * Written by David Howells ([email protected]) | |
6 | */ | |
7 | ||
8 | #include <linux/module.h> | |
9 | #include <linux/export.h> | |
10 | #include <linux/fs.h> | |
11 | #include <linux/mm.h> | |
12 | #include <linux/pagemap.h> | |
13 | #include <linux/slab.h> | |
14 | #include <linux/uio.h> | |
15 | #include <linux/sched/mm.h> | |
16 | #include <linux/task_io_accounting_ops.h> | |
17 | #include <linux/netfs.h> | |
18 | #include "internal.h" | |
77b4d2c6 DH |
19 | #define CREATE_TRACE_POINTS |
20 | #include <trace/events/netfs.h> | |
3d3c9504 DH |
21 | |
22 | MODULE_DESCRIPTION("Network fs support"); | |
23 | MODULE_AUTHOR("Red Hat, Inc."); | |
24 | MODULE_LICENSE("GPL"); | |
25 | ||
26 | unsigned netfs_debug; | |
27 | module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); | |
28 | MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); | |
29 | ||
30 | static void netfs_rreq_work(struct work_struct *); | |
31 | static void __netfs_put_subrequest(struct netfs_read_subrequest *, bool); | |
32 | ||
33 | static void netfs_put_subrequest(struct netfs_read_subrequest *subreq, | |
34 | bool was_async) | |
35 | { | |
36 | if (refcount_dec_and_test(&subreq->usage)) | |
37 | __netfs_put_subrequest(subreq, was_async); | |
38 | } | |
39 | ||
40 | static struct netfs_read_request *netfs_alloc_read_request( | |
41 | const struct netfs_read_request_ops *ops, void *netfs_priv, | |
42 | struct file *file) | |
43 | { | |
44 | static atomic_t debug_ids; | |
45 | struct netfs_read_request *rreq; | |
46 | ||
47 | rreq = kzalloc(sizeof(struct netfs_read_request), GFP_KERNEL); | |
48 | if (rreq) { | |
49 | rreq->netfs_ops = ops; | |
50 | rreq->netfs_priv = netfs_priv; | |
51 | rreq->inode = file_inode(file); | |
52 | rreq->i_size = i_size_read(rreq->inode); | |
53 | rreq->debug_id = atomic_inc_return(&debug_ids); | |
54 | INIT_LIST_HEAD(&rreq->subrequests); | |
55 | INIT_WORK(&rreq->work, netfs_rreq_work); | |
56 | refcount_set(&rreq->usage, 1); | |
57 | __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); | |
58 | ops->init_rreq(rreq, file); | |
289af54c | 59 | netfs_stat(&netfs_n_rh_rreq); |
3d3c9504 DH |
60 | } |
61 | ||
62 | return rreq; | |
63 | } | |
64 | ||
65 | static void netfs_get_read_request(struct netfs_read_request *rreq) | |
66 | { | |
67 | refcount_inc(&rreq->usage); | |
68 | } | |
69 | ||
70 | static void netfs_rreq_clear_subreqs(struct netfs_read_request *rreq, | |
71 | bool was_async) | |
72 | { | |
73 | struct netfs_read_subrequest *subreq; | |
74 | ||
75 | while (!list_empty(&rreq->subrequests)) { | |
76 | subreq = list_first_entry(&rreq->subrequests, | |
77 | struct netfs_read_subrequest, rreq_link); | |
78 | list_del(&subreq->rreq_link); | |
79 | netfs_put_subrequest(subreq, was_async); | |
80 | } | |
81 | } | |
82 | ||
83 | static void netfs_free_read_request(struct work_struct *work) | |
84 | { | |
85 | struct netfs_read_request *rreq = | |
86 | container_of(work, struct netfs_read_request, work); | |
87 | netfs_rreq_clear_subreqs(rreq, false); | |
88 | if (rreq->netfs_priv) | |
89 | rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv); | |
77b4d2c6 | 90 | trace_netfs_rreq(rreq, netfs_rreq_trace_free); |
726218fd DH |
91 | if (rreq->cache_resources.ops) |
92 | rreq->cache_resources.ops->end_operation(&rreq->cache_resources); | |
3d3c9504 | 93 | kfree(rreq); |
289af54c | 94 | netfs_stat_d(&netfs_n_rh_rreq); |
3d3c9504 DH |
95 | } |
96 | ||
97 | static void netfs_put_read_request(struct netfs_read_request *rreq, bool was_async) | |
98 | { | |
99 | if (refcount_dec_and_test(&rreq->usage)) { | |
100 | if (was_async) { | |
101 | rreq->work.func = netfs_free_read_request; | |
102 | if (!queue_work(system_unbound_wq, &rreq->work)) | |
103 | BUG(); | |
104 | } else { | |
105 | netfs_free_read_request(&rreq->work); | |
106 | } | |
107 | } | |
108 | } | |
109 | ||
110 | /* | |
111 | * Allocate and partially initialise an I/O request structure. | |
112 | */ | |
113 | static struct netfs_read_subrequest *netfs_alloc_subrequest( | |
114 | struct netfs_read_request *rreq) | |
115 | { | |
116 | struct netfs_read_subrequest *subreq; | |
117 | ||
118 | subreq = kzalloc(sizeof(struct netfs_read_subrequest), GFP_KERNEL); | |
119 | if (subreq) { | |
120 | INIT_LIST_HEAD(&subreq->rreq_link); | |
121 | refcount_set(&subreq->usage, 2); | |
122 | subreq->rreq = rreq; | |
123 | netfs_get_read_request(rreq); | |
289af54c | 124 | netfs_stat(&netfs_n_rh_sreq); |
3d3c9504 DH |
125 | } |
126 | ||
127 | return subreq; | |
128 | } | |
129 | ||
130 | static void netfs_get_read_subrequest(struct netfs_read_subrequest *subreq) | |
131 | { | |
132 | refcount_inc(&subreq->usage); | |
133 | } | |
134 | ||
135 | static void __netfs_put_subrequest(struct netfs_read_subrequest *subreq, | |
136 | bool was_async) | |
137 | { | |
138 | struct netfs_read_request *rreq = subreq->rreq; | |
139 | ||
77b4d2c6 | 140 | trace_netfs_sreq(subreq, netfs_sreq_trace_free); |
3d3c9504 | 141 | kfree(subreq); |
289af54c | 142 | netfs_stat_d(&netfs_n_rh_sreq); |
3d3c9504 DH |
143 | netfs_put_read_request(rreq, was_async); |
144 | } | |
145 | ||
146 | /* | |
147 | * Clear the unread part of an I/O request. | |
148 | */ | |
149 | static void netfs_clear_unread(struct netfs_read_subrequest *subreq) | |
150 | { | |
151 | struct iov_iter iter; | |
152 | ||
330de47d | 153 | iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages, |
3d3c9504 DH |
154 | subreq->start + subreq->transferred, |
155 | subreq->len - subreq->transferred); | |
156 | iov_iter_zero(iov_iter_count(&iter), &iter); | |
157 | } | |
158 | ||
726218fd DH |
159 | static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, |
160 | bool was_async) | |
161 | { | |
162 | struct netfs_read_subrequest *subreq = priv; | |
163 | ||
164 | netfs_subreq_terminated(subreq, transferred_or_error, was_async); | |
165 | } | |
166 | ||
167 | /* | |
168 | * Issue a read against the cache. | |
169 | * - Eats the caller's ref on subreq. | |
170 | */ | |
171 | static void netfs_read_from_cache(struct netfs_read_request *rreq, | |
172 | struct netfs_read_subrequest *subreq, | |
173 | bool seek_data) | |
174 | { | |
175 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
176 | struct iov_iter iter; | |
177 | ||
178 | netfs_stat(&netfs_n_rh_read); | |
179 | iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, | |
180 | subreq->start + subreq->transferred, | |
181 | subreq->len - subreq->transferred); | |
182 | ||
183 | cres->ops->read(cres, subreq->start, &iter, seek_data, | |
184 | netfs_cache_read_terminated, subreq); | |
185 | } | |
186 | ||
3d3c9504 DH |
187 | /* |
188 | * Fill a subrequest region with zeroes. | |
189 | */ | |
190 | static void netfs_fill_with_zeroes(struct netfs_read_request *rreq, | |
191 | struct netfs_read_subrequest *subreq) | |
192 | { | |
289af54c | 193 | netfs_stat(&netfs_n_rh_zero); |
3d3c9504 DH |
194 | __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); |
195 | netfs_subreq_terminated(subreq, 0, false); | |
196 | } | |
197 | ||
198 | /* | |
199 | * Ask the netfs to issue a read request to the server for us. | |
200 | * | |
201 | * The netfs is expected to read from subreq->pos + subreq->transferred to | |
202 | * subreq->pos + subreq->len - 1. It may not backtrack and write data into the | |
203 | * buffer prior to the transferred point as it might clobber dirty data | |
204 | * obtained from the cache. | |
205 | * | |
206 | * Alternatively, the netfs is allowed to indicate one of two things: | |
207 | * | |
208 | * - NETFS_SREQ_SHORT_READ: A short read - it will get called again to try and | |
209 | * make progress. | |
210 | * | |
211 | * - NETFS_SREQ_CLEAR_TAIL: A short read - the rest of the buffer will be | |
212 | * cleared. | |
213 | */ | |
214 | static void netfs_read_from_server(struct netfs_read_request *rreq, | |
215 | struct netfs_read_subrequest *subreq) | |
216 | { | |
289af54c | 217 | netfs_stat(&netfs_n_rh_download); |
3d3c9504 DH |
218 | rreq->netfs_ops->issue_op(subreq); |
219 | } | |
220 | ||
221 | /* | |
222 | * Release those waiting. | |
223 | */ | |
224 | static void netfs_rreq_completed(struct netfs_read_request *rreq, bool was_async) | |
225 | { | |
77b4d2c6 | 226 | trace_netfs_rreq(rreq, netfs_rreq_trace_done); |
3d3c9504 DH |
227 | netfs_rreq_clear_subreqs(rreq, was_async); |
228 | netfs_put_read_request(rreq, was_async); | |
229 | } | |
230 | ||
726218fd DH |
231 | /* |
232 | * Deal with the completion of writing the data to the cache. We have to clear | |
78525c74 | 233 | * the PG_fscache bits on the folios involved and release the caller's ref. |
726218fd DH |
234 | * |
235 | * May be called in softirq mode and we inherit a ref from the caller. | |
236 | */ | |
237 | static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq, | |
238 | bool was_async) | |
239 | { | |
240 | struct netfs_read_subrequest *subreq; | |
78525c74 | 241 | struct folio *folio; |
726218fd DH |
242 | pgoff_t unlocked = 0; |
243 | bool have_unlocked = false; | |
244 | ||
245 | rcu_read_lock(); | |
246 | ||
247 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
248 | XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); | |
249 | ||
78525c74 | 250 | xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { |
726218fd | 251 | /* We might have multiple writes from the same huge |
78525c74 | 252 | * folio, but we mustn't unlock a folio more than once. |
726218fd | 253 | */ |
78525c74 | 254 | if (have_unlocked && folio_index(folio) <= unlocked) |
726218fd | 255 | continue; |
78525c74 DH |
256 | unlocked = folio_index(folio); |
257 | folio_end_fscache(folio); | |
726218fd DH |
258 | have_unlocked = true; |
259 | } | |
260 | } | |
261 | ||
262 | rcu_read_unlock(); | |
263 | netfs_rreq_completed(rreq, was_async); | |
264 | } | |
265 | ||
266 | static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, | |
267 | bool was_async) | |
268 | { | |
269 | struct netfs_read_subrequest *subreq = priv; | |
270 | struct netfs_read_request *rreq = subreq->rreq; | |
271 | ||
272 | if (IS_ERR_VALUE(transferred_or_error)) { | |
273 | netfs_stat(&netfs_n_rh_write_failed); | |
0246f3e5 DH |
274 | trace_netfs_failure(rreq, subreq, transferred_or_error, |
275 | netfs_fail_copy_to_cache); | |
726218fd DH |
276 | } else { |
277 | netfs_stat(&netfs_n_rh_write_done); | |
278 | } | |
279 | ||
280 | trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); | |
281 | ||
282 | /* If we decrement nr_wr_ops to 0, the ref belongs to us. */ | |
283 | if (atomic_dec_and_test(&rreq->nr_wr_ops)) | |
284 | netfs_rreq_unmark_after_write(rreq, was_async); | |
285 | ||
286 | netfs_put_subrequest(subreq, was_async); | |
287 | } | |
288 | ||
289 | /* | |
290 | * Perform any outstanding writes to the cache. We inherit a ref from the | |
291 | * caller. | |
292 | */ | |
293 | static void netfs_rreq_do_write_to_cache(struct netfs_read_request *rreq) | |
294 | { | |
295 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
296 | struct netfs_read_subrequest *subreq, *next, *p; | |
297 | struct iov_iter iter; | |
298 | int ret; | |
299 | ||
300 | trace_netfs_rreq(rreq, netfs_rreq_trace_write); | |
301 | ||
302 | /* We don't want terminating writes trying to wake us up whilst we're | |
303 | * still going through the list. | |
304 | */ | |
305 | atomic_inc(&rreq->nr_wr_ops); | |
306 | ||
307 | list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { | |
308 | if (!test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) { | |
309 | list_del_init(&subreq->rreq_link); | |
310 | netfs_put_subrequest(subreq, false); | |
311 | } | |
312 | } | |
313 | ||
314 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
315 | /* Amalgamate adjacent writes */ | |
316 | while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { | |
317 | next = list_next_entry(subreq, rreq_link); | |
318 | if (next->start != subreq->start + subreq->len) | |
319 | break; | |
320 | subreq->len += next->len; | |
321 | list_del_init(&next->rreq_link); | |
322 | netfs_put_subrequest(next, false); | |
323 | } | |
324 | ||
325 | ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, | |
326 | rreq->i_size); | |
327 | if (ret < 0) { | |
0246f3e5 | 328 | trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); |
726218fd DH |
329 | trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); |
330 | continue; | |
331 | } | |
332 | ||
333 | iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages, | |
334 | subreq->start, subreq->len); | |
335 | ||
336 | atomic_inc(&rreq->nr_wr_ops); | |
337 | netfs_stat(&netfs_n_rh_write); | |
338 | netfs_get_read_subrequest(subreq); | |
339 | trace_netfs_sreq(subreq, netfs_sreq_trace_write); | |
340 | cres->ops->write(cres, subreq->start, &iter, | |
341 | netfs_rreq_copy_terminated, subreq); | |
342 | } | |
343 | ||
344 | /* If we decrement nr_wr_ops to 0, the usage ref belongs to us. */ | |
345 | if (atomic_dec_and_test(&rreq->nr_wr_ops)) | |
346 | netfs_rreq_unmark_after_write(rreq, false); | |
347 | } | |
348 | ||
349 | static void netfs_rreq_write_to_cache_work(struct work_struct *work) | |
350 | { | |
351 | struct netfs_read_request *rreq = | |
352 | container_of(work, struct netfs_read_request, work); | |
353 | ||
354 | netfs_rreq_do_write_to_cache(rreq); | |
355 | } | |
356 | ||
598ad0bd | 357 | static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq) |
726218fd | 358 | { |
598ad0bd DH |
359 | rreq->work.func = netfs_rreq_write_to_cache_work; |
360 | if (!queue_work(system_unbound_wq, &rreq->work)) | |
361 | BUG(); | |
726218fd DH |
362 | } |
363 | ||
3d3c9504 | 364 | /* |
78525c74 DH |
365 | * Unlock the folios in a read operation. We need to set PG_fscache on any |
366 | * folios we're going to write back before we unlock them. | |
3d3c9504 DH |
367 | */ |
368 | static void netfs_rreq_unlock(struct netfs_read_request *rreq) | |
369 | { | |
370 | struct netfs_read_subrequest *subreq; | |
78525c74 | 371 | struct folio *folio; |
3d3c9504 DH |
372 | unsigned int iopos, account = 0; |
373 | pgoff_t start_page = rreq->start / PAGE_SIZE; | |
374 | pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; | |
375 | bool subreq_failed = false; | |
3d3c9504 DH |
376 | |
377 | XA_STATE(xas, &rreq->mapping->i_pages, start_page); | |
378 | ||
379 | if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { | |
380 | __clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); | |
381 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
382 | __clear_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); | |
383 | } | |
384 | } | |
385 | ||
386 | /* Walk through the pagecache and the I/O request lists simultaneously. | |
387 | * We may have a mixture of cached and uncached sections and we only | |
388 | * really want to write out the uncached sections. This is slightly | |
389 | * complicated by the possibility that we might have huge pages with a | |
390 | * mixture inside. | |
391 | */ | |
392 | subreq = list_first_entry(&rreq->subrequests, | |
393 | struct netfs_read_subrequest, rreq_link); | |
394 | iopos = 0; | |
395 | subreq_failed = (subreq->error < 0); | |
396 | ||
77b4d2c6 DH |
397 | trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); |
398 | ||
3d3c9504 | 399 | rcu_read_lock(); |
78525c74 DH |
400 | xas_for_each(&xas, folio, last_page) { |
401 | unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; | |
402 | unsigned int pgend = pgpos + folio_size(folio); | |
3d3c9504 DH |
403 | bool pg_failed = false; |
404 | ||
405 | for (;;) { | |
406 | if (!subreq) { | |
407 | pg_failed = true; | |
408 | break; | |
409 | } | |
410 | if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) | |
78525c74 | 411 | folio_start_fscache(folio); |
3d3c9504 DH |
412 | pg_failed |= subreq_failed; |
413 | if (pgend < iopos + subreq->len) | |
414 | break; | |
415 | ||
416 | account += subreq->transferred; | |
417 | iopos += subreq->len; | |
418 | if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { | |
419 | subreq = list_next_entry(subreq, rreq_link); | |
420 | subreq_failed = (subreq->error < 0); | |
421 | } else { | |
422 | subreq = NULL; | |
423 | subreq_failed = false; | |
424 | } | |
425 | if (pgend == iopos) | |
426 | break; | |
427 | } | |
428 | ||
429 | if (!pg_failed) { | |
78525c74 DH |
430 | flush_dcache_folio(folio); |
431 | folio_mark_uptodate(folio); | |
3d3c9504 DH |
432 | } |
433 | ||
78525c74 DH |
434 | if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { |
435 | if (folio_index(folio) == rreq->no_unlock_folio && | |
436 | test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) | |
3d3c9504 DH |
437 | _debug("no unlock"); |
438 | else | |
78525c74 | 439 | folio_unlock(folio); |
3d3c9504 DH |
440 | } |
441 | } | |
442 | rcu_read_unlock(); | |
443 | ||
444 | task_io_account_read(account); | |
445 | if (rreq->netfs_ops->done) | |
446 | rreq->netfs_ops->done(rreq); | |
447 | } | |
448 | ||
449 | /* | |
450 | * Handle a short read. | |
451 | */ | |
452 | static void netfs_rreq_short_read(struct netfs_read_request *rreq, | |
453 | struct netfs_read_subrequest *subreq) | |
454 | { | |
455 | __clear_bit(NETFS_SREQ_SHORT_READ, &subreq->flags); | |
456 | __set_bit(NETFS_SREQ_SEEK_DATA_READ, &subreq->flags); | |
457 | ||
289af54c | 458 | netfs_stat(&netfs_n_rh_short_read); |
77b4d2c6 DH |
459 | trace_netfs_sreq(subreq, netfs_sreq_trace_resubmit_short); |
460 | ||
3d3c9504 DH |
461 | netfs_get_read_subrequest(subreq); |
462 | atomic_inc(&rreq->nr_rd_ops); | |
726218fd DH |
463 | if (subreq->source == NETFS_READ_FROM_CACHE) |
464 | netfs_read_from_cache(rreq, subreq, true); | |
465 | else | |
466 | netfs_read_from_server(rreq, subreq); | |
3d3c9504 DH |
467 | } |
468 | ||
469 | /* | |
470 | * Resubmit any short or failed operations. Returns true if we got the rreq | |
471 | * ref back. | |
472 | */ | |
473 | static bool netfs_rreq_perform_resubmissions(struct netfs_read_request *rreq) | |
474 | { | |
475 | struct netfs_read_subrequest *subreq; | |
476 | ||
477 | WARN_ON(in_interrupt()); | |
478 | ||
77b4d2c6 DH |
479 | trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); |
480 | ||
3d3c9504 DH |
481 | /* We don't want terminating submissions trying to wake us up whilst |
482 | * we're still going through the list. | |
483 | */ | |
484 | atomic_inc(&rreq->nr_rd_ops); | |
485 | ||
486 | __clear_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); | |
487 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
488 | if (subreq->error) { | |
489 | if (subreq->source != NETFS_READ_FROM_CACHE) | |
490 | break; | |
491 | subreq->source = NETFS_DOWNLOAD_FROM_SERVER; | |
492 | subreq->error = 0; | |
289af54c | 493 | netfs_stat(&netfs_n_rh_download_instead); |
77b4d2c6 | 494 | trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead); |
3d3c9504 DH |
495 | netfs_get_read_subrequest(subreq); |
496 | atomic_inc(&rreq->nr_rd_ops); | |
497 | netfs_read_from_server(rreq, subreq); | |
498 | } else if (test_bit(NETFS_SREQ_SHORT_READ, &subreq->flags)) { | |
499 | netfs_rreq_short_read(rreq, subreq); | |
500 | } | |
501 | } | |
502 | ||
503 | /* If we decrement nr_rd_ops to 0, the usage ref belongs to us. */ | |
504 | if (atomic_dec_and_test(&rreq->nr_rd_ops)) | |
505 | return true; | |
506 | ||
507 | wake_up_var(&rreq->nr_rd_ops); | |
508 | return false; | |
509 | } | |
510 | ||
726218fd DH |
511 | /* |
512 | * Check to see if the data read is still valid. | |
513 | */ | |
514 | static void netfs_rreq_is_still_valid(struct netfs_read_request *rreq) | |
515 | { | |
516 | struct netfs_read_subrequest *subreq; | |
517 | ||
518 | if (!rreq->netfs_ops->is_still_valid || | |
519 | rreq->netfs_ops->is_still_valid(rreq)) | |
520 | return; | |
521 | ||
522 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
523 | if (subreq->source == NETFS_READ_FROM_CACHE) { | |
524 | subreq->error = -ESTALE; | |
525 | __set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); | |
526 | } | |
527 | } | |
528 | } | |
529 | ||
3d3c9504 DH |
530 | /* |
531 | * Assess the state of a read request and decide what to do next. | |
532 | * | |
533 | * Note that we could be in an ordinary kernel thread, on a workqueue or in | |
534 | * softirq context at this point. We inherit a ref from the caller. | |
535 | */ | |
536 | static void netfs_rreq_assess(struct netfs_read_request *rreq, bool was_async) | |
537 | { | |
77b4d2c6 DH |
538 | trace_netfs_rreq(rreq, netfs_rreq_trace_assess); |
539 | ||
3d3c9504 | 540 | again: |
726218fd DH |
541 | netfs_rreq_is_still_valid(rreq); |
542 | ||
3d3c9504 DH |
543 | if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) && |
544 | test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) { | |
545 | if (netfs_rreq_perform_resubmissions(rreq)) | |
546 | goto again; | |
547 | return; | |
548 | } | |
549 | ||
550 | netfs_rreq_unlock(rreq); | |
551 | ||
552 | clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); | |
553 | wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); | |
554 | ||
726218fd | 555 | if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags)) |
598ad0bd | 556 | return netfs_rreq_write_to_cache(rreq); |
726218fd | 557 | |
3d3c9504 DH |
558 | netfs_rreq_completed(rreq, was_async); |
559 | } | |
560 | ||
561 | static void netfs_rreq_work(struct work_struct *work) | |
562 | { | |
563 | struct netfs_read_request *rreq = | |
564 | container_of(work, struct netfs_read_request, work); | |
565 | netfs_rreq_assess(rreq, false); | |
566 | } | |
567 | ||
568 | /* | |
569 | * Handle the completion of all outstanding I/O operations on a read request. | |
570 | * We inherit a ref from the caller. | |
571 | */ | |
572 | static void netfs_rreq_terminated(struct netfs_read_request *rreq, | |
573 | bool was_async) | |
574 | { | |
575 | if (test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags) && | |
576 | was_async) { | |
577 | if (!queue_work(system_unbound_wq, &rreq->work)) | |
578 | BUG(); | |
579 | } else { | |
580 | netfs_rreq_assess(rreq, was_async); | |
581 | } | |
582 | } | |
583 | ||
584 | /** | |
585 | * netfs_subreq_terminated - Note the termination of an I/O operation. | |
586 | * @subreq: The I/O request that has terminated. | |
587 | * @transferred_or_error: The amount of data transferred or an error code. | |
588 | * @was_async: The termination was asynchronous | |
589 | * | |
590 | * This tells the read helper that a contributory I/O operation has terminated, | |
591 | * one way or another, and that it should integrate the results. | |
592 | * | |
593 | * The caller indicates in @transferred_or_error the outcome of the operation, | |
594 | * supplying a positive value to indicate the number of bytes transferred, 0 to | |
595 | * indicate a failure to transfer anything that should be retried or a negative | |
596 | * error code. The helper will look after reissuing I/O operations as | |
597 | * appropriate and writing downloaded data to the cache. | |
598 | * | |
599 | * If @was_async is true, the caller might be running in softirq or interrupt | |
600 | * context and we can't sleep. | |
601 | */ | |
602 | void netfs_subreq_terminated(struct netfs_read_subrequest *subreq, | |
603 | ssize_t transferred_or_error, | |
604 | bool was_async) | |
605 | { | |
606 | struct netfs_read_request *rreq = subreq->rreq; | |
607 | int u; | |
608 | ||
609 | _enter("[%u]{%llx,%lx},%zd", | |
610 | subreq->debug_index, subreq->start, subreq->flags, | |
611 | transferred_or_error); | |
612 | ||
289af54c DH |
613 | switch (subreq->source) { |
614 | case NETFS_READ_FROM_CACHE: | |
615 | netfs_stat(&netfs_n_rh_read_done); | |
616 | break; | |
617 | case NETFS_DOWNLOAD_FROM_SERVER: | |
618 | netfs_stat(&netfs_n_rh_download_done); | |
619 | break; | |
620 | default: | |
621 | break; | |
622 | } | |
623 | ||
3d3c9504 DH |
624 | if (IS_ERR_VALUE(transferred_or_error)) { |
625 | subreq->error = transferred_or_error; | |
0246f3e5 DH |
626 | trace_netfs_failure(rreq, subreq, transferred_or_error, |
627 | netfs_fail_read); | |
3d3c9504 DH |
628 | goto failed; |
629 | } | |
630 | ||
631 | if (WARN(transferred_or_error > subreq->len - subreq->transferred, | |
632 | "Subreq overread: R%x[%x] %zd > %zu - %zu", | |
633 | rreq->debug_id, subreq->debug_index, | |
634 | transferred_or_error, subreq->len, subreq->transferred)) | |
635 | transferred_or_error = subreq->len - subreq->transferred; | |
636 | ||
637 | subreq->error = 0; | |
638 | subreq->transferred += transferred_or_error; | |
639 | if (subreq->transferred < subreq->len) | |
640 | goto incomplete; | |
641 | ||
642 | complete: | |
643 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); | |
644 | if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) | |
645 | set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); | |
646 | ||
647 | out: | |
77b4d2c6 DH |
648 | trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); |
649 | ||
3d3c9504 DH |
650 | /* If we decrement nr_rd_ops to 0, the ref belongs to us. */ |
651 | u = atomic_dec_return(&rreq->nr_rd_ops); | |
652 | if (u == 0) | |
653 | netfs_rreq_terminated(rreq, was_async); | |
654 | else if (u == 1) | |
655 | wake_up_var(&rreq->nr_rd_ops); | |
656 | ||
657 | netfs_put_subrequest(subreq, was_async); | |
658 | return; | |
659 | ||
660 | incomplete: | |
661 | if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { | |
662 | netfs_clear_unread(subreq); | |
663 | subreq->transferred = subreq->len; | |
664 | goto complete; | |
665 | } | |
666 | ||
667 | if (transferred_or_error == 0) { | |
668 | if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { | |
669 | subreq->error = -ENODATA; | |
670 | goto failed; | |
671 | } | |
672 | } else { | |
673 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); | |
674 | } | |
675 | ||
676 | __set_bit(NETFS_SREQ_SHORT_READ, &subreq->flags); | |
677 | set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); | |
678 | goto out; | |
679 | ||
680 | failed: | |
681 | if (subreq->source == NETFS_READ_FROM_CACHE) { | |
289af54c | 682 | netfs_stat(&netfs_n_rh_read_failed); |
3d3c9504 DH |
683 | set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); |
684 | } else { | |
289af54c | 685 | netfs_stat(&netfs_n_rh_download_failed); |
3d3c9504 DH |
686 | set_bit(NETFS_RREQ_FAILED, &rreq->flags); |
687 | rreq->error = subreq->error; | |
688 | } | |
689 | goto out; | |
690 | } | |
691 | EXPORT_SYMBOL(netfs_subreq_terminated); | |
692 | ||
693 | static enum netfs_read_source netfs_cache_prepare_read(struct netfs_read_subrequest *subreq, | |
694 | loff_t i_size) | |
695 | { | |
696 | struct netfs_read_request *rreq = subreq->rreq; | |
726218fd | 697 | struct netfs_cache_resources *cres = &rreq->cache_resources; |
3d3c9504 | 698 | |
726218fd DH |
699 | if (cres->ops) |
700 | return cres->ops->prepare_read(subreq, i_size); | |
3d3c9504 DH |
701 | if (subreq->start >= rreq->i_size) |
702 | return NETFS_FILL_WITH_ZEROES; | |
703 | return NETFS_DOWNLOAD_FROM_SERVER; | |
704 | } | |
705 | ||
706 | /* | |
707 | * Work out what sort of subrequest the next one will be. | |
708 | */ | |
709 | static enum netfs_read_source | |
710 | netfs_rreq_prepare_read(struct netfs_read_request *rreq, | |
711 | struct netfs_read_subrequest *subreq) | |
712 | { | |
713 | enum netfs_read_source source; | |
714 | ||
715 | _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); | |
716 | ||
717 | source = netfs_cache_prepare_read(subreq, rreq->i_size); | |
718 | if (source == NETFS_INVALID_READ) | |
719 | goto out; | |
720 | ||
721 | if (source == NETFS_DOWNLOAD_FROM_SERVER) { | |
722 | /* Call out to the netfs to let it shrink the request to fit | |
723 | * its own I/O sizes and boundaries. If it shinks it here, it | |
724 | * will be called again to make simultaneous calls; if it wants | |
725 | * to make serial calls, it can indicate a short read and then | |
726 | * we will call it again. | |
727 | */ | |
728 | if (subreq->len > rreq->i_size - subreq->start) | |
729 | subreq->len = rreq->i_size - subreq->start; | |
730 | ||
731 | if (rreq->netfs_ops->clamp_length && | |
732 | !rreq->netfs_ops->clamp_length(subreq)) { | |
733 | source = NETFS_INVALID_READ; | |
734 | goto out; | |
735 | } | |
736 | } | |
737 | ||
738 | if (WARN_ON(subreq->len == 0)) | |
739 | source = NETFS_INVALID_READ; | |
740 | ||
741 | out: | |
742 | subreq->source = source; | |
77b4d2c6 | 743 | trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); |
3d3c9504 DH |
744 | return source; |
745 | } | |
746 | ||
747 | /* | |
748 | * Slice off a piece of a read request and submit an I/O request for it. | |
749 | */ | |
750 | static bool netfs_rreq_submit_slice(struct netfs_read_request *rreq, | |
751 | unsigned int *_debug_index) | |
752 | { | |
753 | struct netfs_read_subrequest *subreq; | |
754 | enum netfs_read_source source; | |
755 | ||
756 | subreq = netfs_alloc_subrequest(rreq); | |
757 | if (!subreq) | |
758 | return false; | |
759 | ||
760 | subreq->debug_index = (*_debug_index)++; | |
761 | subreq->start = rreq->start + rreq->submitted; | |
762 | subreq->len = rreq->len - rreq->submitted; | |
763 | ||
764 | _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted); | |
765 | list_add_tail(&subreq->rreq_link, &rreq->subrequests); | |
766 | ||
767 | /* Call out to the cache to find out what it can do with the remaining | |
768 | * subset. It tells us in subreq->flags what it decided should be done | |
769 | * and adjusts subreq->len down if the subset crosses a cache boundary. | |
770 | * | |
771 | * Then when we hand the subset, it can choose to take a subset of that | |
772 | * (the starts must coincide), in which case, we go around the loop | |
773 | * again and ask it to download the next piece. | |
774 | */ | |
775 | source = netfs_rreq_prepare_read(rreq, subreq); | |
776 | if (source == NETFS_INVALID_READ) | |
777 | goto subreq_failed; | |
778 | ||
779 | atomic_inc(&rreq->nr_rd_ops); | |
780 | ||
781 | rreq->submitted += subreq->len; | |
782 | ||
77b4d2c6 | 783 | trace_netfs_sreq(subreq, netfs_sreq_trace_submit); |
3d3c9504 DH |
784 | switch (source) { |
785 | case NETFS_FILL_WITH_ZEROES: | |
786 | netfs_fill_with_zeroes(rreq, subreq); | |
787 | break; | |
788 | case NETFS_DOWNLOAD_FROM_SERVER: | |
789 | netfs_read_from_server(rreq, subreq); | |
790 | break; | |
726218fd DH |
791 | case NETFS_READ_FROM_CACHE: |
792 | netfs_read_from_cache(rreq, subreq, false); | |
793 | break; | |
3d3c9504 DH |
794 | default: |
795 | BUG(); | |
796 | } | |
797 | ||
798 | return true; | |
799 | ||
800 | subreq_failed: | |
801 | rreq->error = subreq->error; | |
802 | netfs_put_subrequest(subreq, false); | |
803 | return false; | |
804 | } | |
805 | ||
726218fd DH |
806 | static void netfs_cache_expand_readahead(struct netfs_read_request *rreq, |
807 | loff_t *_start, size_t *_len, loff_t i_size) | |
808 | { | |
809 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
810 | ||
811 | if (cres->ops && cres->ops->expand_readahead) | |
812 | cres->ops->expand_readahead(cres, _start, _len, i_size); | |
813 | } | |
814 | ||
3d3c9504 DH |
815 | static void netfs_rreq_expand(struct netfs_read_request *rreq, |
816 | struct readahead_control *ractl) | |
817 | { | |
726218fd DH |
818 | /* Give the cache a chance to change the request parameters. The |
819 | * resultant request must contain the original region. | |
820 | */ | |
821 | netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); | |
822 | ||
3d3c9504 DH |
823 | /* Give the netfs a chance to change the request parameters. The |
824 | * resultant request must contain the original region. | |
825 | */ | |
826 | if (rreq->netfs_ops->expand_readahead) | |
827 | rreq->netfs_ops->expand_readahead(rreq); | |
828 | ||
829 | /* Expand the request if the cache wants it to start earlier. Note | |
830 | * that the expansion may get further extended if the VM wishes to | |
831 | * insert THPs and the preferred start and/or end wind up in the middle | |
832 | * of THPs. | |
833 | * | |
834 | * If this is the case, however, the THP size should be an integer | |
835 | * multiple of the cache granule size, so we get a whole number of | |
836 | * granules to deal with. | |
837 | */ | |
838 | if (rreq->start != readahead_pos(ractl) || | |
839 | rreq->len != readahead_length(ractl)) { | |
840 | readahead_expand(ractl, rreq->start, rreq->len); | |
841 | rreq->start = readahead_pos(ractl); | |
842 | rreq->len = readahead_length(ractl); | |
77b4d2c6 DH |
843 | |
844 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), | |
845 | netfs_read_trace_expanded); | |
3d3c9504 DH |
846 | } |
847 | } | |
848 | ||
849 | /** | |
850 | * netfs_readahead - Helper to manage a read request | |
851 | * @ractl: The description of the readahead request | |
852 | * @ops: The network filesystem's operations for the helper to use | |
853 | * @netfs_priv: Private netfs data to be retained in the request | |
854 | * | |
855 | * Fulfil a readahead request by drawing data from the cache if possible, or | |
856 | * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O | |
857 | * requests from different sources will get munged together. If necessary, the | |
858 | * readahead window can be expanded in either direction to a more convenient | |
859 | * alighment for RPC efficiency or to make storage in the cache feasible. | |
860 | * | |
861 | * The calling netfs must provide a table of operations, only one of which, | |
862 | * issue_op, is mandatory. It may also be passed a private token, which will | |
863 | * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup(). | |
864 | * | |
865 | * This is usable whether or not caching is enabled. | |
866 | */ | |
867 | void netfs_readahead(struct readahead_control *ractl, | |
868 | const struct netfs_read_request_ops *ops, | |
869 | void *netfs_priv) | |
870 | { | |
871 | struct netfs_read_request *rreq; | |
3d3c9504 | 872 | unsigned int debug_index = 0; |
726218fd | 873 | int ret; |
3d3c9504 DH |
874 | |
875 | _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); | |
876 | ||
877 | if (readahead_count(ractl) == 0) | |
878 | goto cleanup; | |
879 | ||
880 | rreq = netfs_alloc_read_request(ops, netfs_priv, ractl->file); | |
881 | if (!rreq) | |
882 | goto cleanup; | |
883 | rreq->mapping = ractl->mapping; | |
884 | rreq->start = readahead_pos(ractl); | |
885 | rreq->len = readahead_length(ractl); | |
886 | ||
726218fd DH |
887 | if (ops->begin_cache_operation) { |
888 | ret = ops->begin_cache_operation(rreq); | |
889 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
890 | goto cleanup_free; | |
891 | } | |
892 | ||
289af54c | 893 | netfs_stat(&netfs_n_rh_readahead); |
77b4d2c6 DH |
894 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), |
895 | netfs_read_trace_readahead); | |
896 | ||
3d3c9504 DH |
897 | netfs_rreq_expand(rreq, ractl); |
898 | ||
899 | atomic_set(&rreq->nr_rd_ops, 1); | |
900 | do { | |
901 | if (!netfs_rreq_submit_slice(rreq, &debug_index)) | |
902 | break; | |
903 | ||
904 | } while (rreq->submitted < rreq->len); | |
905 | ||
78525c74 | 906 | /* Drop the refs on the folios here rather than in the cache or |
3d3c9504 DH |
907 | * filesystem. The locks will be dropped in netfs_rreq_unlock(). |
908 | */ | |
78525c74 DH |
909 | while (readahead_folio(ractl)) |
910 | ; | |
3d3c9504 DH |
911 | |
912 | /* If we decrement nr_rd_ops to 0, the ref belongs to us. */ | |
913 | if (atomic_dec_and_test(&rreq->nr_rd_ops)) | |
914 | netfs_rreq_assess(rreq, false); | |
915 | return; | |
916 | ||
726218fd DH |
917 | cleanup_free: |
918 | netfs_put_read_request(rreq, false); | |
919 | return; | |
3d3c9504 DH |
920 | cleanup: |
921 | if (netfs_priv) | |
922 | ops->cleanup(ractl->mapping, netfs_priv); | |
923 | return; | |
924 | } | |
925 | EXPORT_SYMBOL(netfs_readahead); | |
926 | ||
927 | /** | |
53b776c7 | 928 | * netfs_readpage - Helper to manage a readpage request |
3d3c9504 | 929 | * @file: The file to read from |
78525c74 | 930 | * @folio: The folio to read |
3d3c9504 DH |
931 | * @ops: The network filesystem's operations for the helper to use |
932 | * @netfs_priv: Private netfs data to be retained in the request | |
933 | * | |
934 | * Fulfil a readpage request by drawing data from the cache if possible, or the | |
935 | * netfs if not. Space beyond the EOF is zero-filled. Multiple I/O requests | |
936 | * from different sources will get munged together. | |
937 | * | |
938 | * The calling netfs must provide a table of operations, only one of which, | |
939 | * issue_op, is mandatory. It may also be passed a private token, which will | |
940 | * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup(). | |
941 | * | |
942 | * This is usable whether or not caching is enabled. | |
943 | */ | |
944 | int netfs_readpage(struct file *file, | |
78525c74 | 945 | struct folio *folio, |
3d3c9504 DH |
946 | const struct netfs_read_request_ops *ops, |
947 | void *netfs_priv) | |
948 | { | |
949 | struct netfs_read_request *rreq; | |
950 | unsigned int debug_index = 0; | |
951 | int ret; | |
952 | ||
78525c74 | 953 | _enter("%lx", folio_index(folio)); |
3d3c9504 DH |
954 | |
955 | rreq = netfs_alloc_read_request(ops, netfs_priv, file); | |
956 | if (!rreq) { | |
957 | if (netfs_priv) | |
3cfef1b6 | 958 | ops->cleanup(folio_file_mapping(folio), netfs_priv); |
78525c74 | 959 | folio_unlock(folio); |
3d3c9504 DH |
960 | return -ENOMEM; |
961 | } | |
78525c74 DH |
962 | rreq->mapping = folio_file_mapping(folio); |
963 | rreq->start = folio_file_pos(folio); | |
964 | rreq->len = folio_size(folio); | |
3d3c9504 | 965 | |
726218fd DH |
966 | if (ops->begin_cache_operation) { |
967 | ret = ops->begin_cache_operation(rreq); | |
968 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) { | |
78525c74 | 969 | folio_unlock(folio); |
726218fd DH |
970 | goto out; |
971 | } | |
972 | } | |
973 | ||
289af54c | 974 | netfs_stat(&netfs_n_rh_readpage); |
77b4d2c6 DH |
975 | trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); |
976 | ||
3d3c9504 DH |
977 | netfs_get_read_request(rreq); |
978 | ||
979 | atomic_set(&rreq->nr_rd_ops, 1); | |
980 | do { | |
981 | if (!netfs_rreq_submit_slice(rreq, &debug_index)) | |
982 | break; | |
983 | ||
984 | } while (rreq->submitted < rreq->len); | |
985 | ||
986 | /* Keep nr_rd_ops incremented so that the ref always belongs to us, and | |
987 | * the service code isn't punted off to a random thread pool to | |
988 | * process. | |
989 | */ | |
990 | do { | |
991 | wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1); | |
992 | netfs_rreq_assess(rreq, false); | |
993 | } while (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)); | |
994 | ||
995 | ret = rreq->error; | |
0246f3e5 DH |
996 | if (ret == 0 && rreq->submitted < rreq->len) { |
997 | trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_readpage); | |
3d3c9504 | 998 | ret = -EIO; |
0246f3e5 | 999 | } |
726218fd | 1000 | out: |
3d3c9504 DH |
1001 | netfs_put_read_request(rreq, false); |
1002 | return ret; | |
1003 | } | |
1004 | EXPORT_SYMBOL(netfs_readpage); | |
e1b1240c | 1005 | |
ddca5b0e DH |
1006 | /* |
1007 | * Prepare a folio for writing without reading first | |
78525c74 | 1008 | * @folio: The folio being prepared |
827a746f JL |
1009 | * @pos: starting position for the write |
1010 | * @len: length of write | |
1011 | * | |
1012 | * In some cases, write_begin doesn't need to read at all: | |
78525c74 DH |
1013 | * - full folio write |
1014 | * - write that lies in a folio that is completely beyond EOF | |
1015 | * - write that covers the folio from start to EOF or beyond it | |
827a746f JL |
1016 | * |
1017 | * If any of these criteria are met, then zero out the unwritten parts | |
78525c74 | 1018 | * of the folio and return true. Otherwise, return false. |
827a746f | 1019 | */ |
78525c74 | 1020 | static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len) |
e1b1240c | 1021 | { |
78525c74 | 1022 | struct inode *inode = folio_inode(folio); |
827a746f | 1023 | loff_t i_size = i_size_read(inode); |
78525c74 | 1024 | size_t offset = offset_in_folio(folio, pos); |
827a746f | 1025 | |
78525c74 DH |
1026 | /* Full folio write */ |
1027 | if (offset == 0 && len >= folio_size(folio)) | |
827a746f JL |
1028 | return true; |
1029 | ||
78525c74 | 1030 | /* pos beyond last folio in the file */ |
827a746f JL |
1031 | if (pos - offset >= i_size) |
1032 | goto zero_out; | |
1033 | ||
78525c74 | 1034 | /* Write that covers from the start of the folio to EOF or beyond */ |
827a746f JL |
1035 | if (offset == 0 && (pos + len) >= i_size) |
1036 | goto zero_out; | |
e1b1240c | 1037 | |
827a746f JL |
1038 | return false; |
1039 | zero_out: | |
78525c74 | 1040 | zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio)); |
827a746f | 1041 | return true; |
e1b1240c DH |
1042 | } |
1043 | ||
1044 | /** | |
1045 | * netfs_write_begin - Helper to prepare for writing | |
1046 | * @file: The file to read from | |
1047 | * @mapping: The mapping to read from | |
1048 | * @pos: File position at which the write will begin | |
78525c74 DH |
1049 | * @len: The length of the write (may extend beyond the end of the folio chosen) |
1050 | * @aop_flags: AOP_* flags | |
1051 | * @_folio: Where to put the resultant folio | |
e1b1240c DH |
1052 | * @_fsdata: Place for the netfs to store a cookie |
1053 | * @ops: The network filesystem's operations for the helper to use | |
1054 | * @netfs_priv: Private netfs data to be retained in the request | |
1055 | * | |
1056 | * Pre-read data for a write-begin request by drawing data from the cache if | |
1057 | * possible, or the netfs if not. Space beyond the EOF is zero-filled. | |
1058 | * Multiple I/O requests from different sources will get munged together. If | |
1059 | * necessary, the readahead window can be expanded in either direction to a | |
1060 | * more convenient alighment for RPC efficiency or to make storage in the cache | |
1061 | * feasible. | |
1062 | * | |
1063 | * The calling netfs must provide a table of operations, only one of which, | |
1064 | * issue_op, is mandatory. | |
1065 | * | |
1066 | * The check_write_begin() operation can be provided to check for and flush | |
78525c74 | 1067 | * conflicting writes once the folio is grabbed and locked. It is passed a |
e1b1240c DH |
1068 | * pointer to the fsdata cookie that gets returned to the VM to be passed to |
1069 | * write_end. It is permitted to sleep. It should return 0 if the request | |
78525c74 DH |
1070 | * should go ahead; unlock the folio and return -EAGAIN to cause the folio to |
1071 | * be regot; or return an error. | |
e1b1240c DH |
1072 | * |
1073 | * This is usable whether or not caching is enabled. | |
1074 | */ | |
1075 | int netfs_write_begin(struct file *file, struct address_space *mapping, | |
78525c74 DH |
1076 | loff_t pos, unsigned int len, unsigned int aop_flags, |
1077 | struct folio **_folio, void **_fsdata, | |
e1b1240c DH |
1078 | const struct netfs_read_request_ops *ops, |
1079 | void *netfs_priv) | |
1080 | { | |
1081 | struct netfs_read_request *rreq; | |
78525c74 | 1082 | struct folio *folio; |
e1b1240c | 1083 | struct inode *inode = file_inode(file); |
78525c74 | 1084 | unsigned int debug_index = 0, fgp_flags; |
e1b1240c | 1085 | pgoff_t index = pos >> PAGE_SHIFT; |
e1b1240c DH |
1086 | int ret; |
1087 | ||
1088 | DEFINE_READAHEAD(ractl, file, NULL, mapping, index); | |
1089 | ||
1090 | retry: | |
78525c74 DH |
1091 | fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; |
1092 | if (aop_flags & AOP_FLAG_NOFS) | |
1093 | fgp_flags |= FGP_NOFS; | |
1094 | folio = __filemap_get_folio(mapping, index, fgp_flags, | |
1095 | mapping_gfp_mask(mapping)); | |
1096 | if (!folio) | |
e1b1240c DH |
1097 | return -ENOMEM; |
1098 | ||
1099 | if (ops->check_write_begin) { | |
1100 | /* Allow the netfs (eg. ceph) to flush conflicts. */ | |
78525c74 | 1101 | ret = ops->check_write_begin(file, pos, len, folio, _fsdata); |
e1b1240c | 1102 | if (ret < 0) { |
0246f3e5 | 1103 | trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); |
e1b1240c DH |
1104 | if (ret == -EAGAIN) |
1105 | goto retry; | |
1106 | goto error; | |
1107 | } | |
1108 | } | |
1109 | ||
78525c74 DH |
1110 | if (folio_test_uptodate(folio)) |
1111 | goto have_folio; | |
e1b1240c DH |
1112 | |
1113 | /* If the page is beyond the EOF, we want to clear it - unless it's | |
1114 | * within the cache granule containing the EOF, in which case we need | |
1115 | * to preload the granule. | |
1116 | */ | |
e1b1240c | 1117 | if (!ops->is_cache_enabled(inode) && |
78525c74 | 1118 | netfs_skip_folio_read(folio, pos, len)) { |
e1b1240c | 1119 | netfs_stat(&netfs_n_rh_write_zskip); |
78525c74 | 1120 | goto have_folio_no_wait; |
e1b1240c DH |
1121 | } |
1122 | ||
1123 | ret = -ENOMEM; | |
1124 | rreq = netfs_alloc_read_request(ops, netfs_priv, file); | |
1125 | if (!rreq) | |
1126 | goto error; | |
78525c74 DH |
1127 | rreq->mapping = folio_file_mapping(folio); |
1128 | rreq->start = folio_file_pos(folio); | |
1129 | rreq->len = folio_size(folio); | |
1130 | rreq->no_unlock_folio = folio_index(folio); | |
1131 | __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); | |
e1b1240c DH |
1132 | netfs_priv = NULL; |
1133 | ||
726218fd DH |
1134 | if (ops->begin_cache_operation) { |
1135 | ret = ops->begin_cache_operation(rreq); | |
1136 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
1137 | goto error_put; | |
1138 | } | |
1139 | ||
e1b1240c DH |
1140 | netfs_stat(&netfs_n_rh_write_begin); |
1141 | trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); | |
1142 | ||
1143 | /* Expand the request to meet caching requirements and download | |
1144 | * preferences. | |
1145 | */ | |
78525c74 | 1146 | ractl._nr_pages = folio_nr_pages(folio); |
e1b1240c DH |
1147 | netfs_rreq_expand(rreq, &ractl); |
1148 | netfs_get_read_request(rreq); | |
1149 | ||
78525c74 DH |
1150 | /* We hold the folio locks, so we can drop the references */ |
1151 | folio_get(folio); | |
1152 | while (readahead_folio(&ractl)) | |
1153 | ; | |
e1b1240c DH |
1154 | |
1155 | atomic_set(&rreq->nr_rd_ops, 1); | |
1156 | do { | |
1157 | if (!netfs_rreq_submit_slice(rreq, &debug_index)) | |
1158 | break; | |
1159 | ||
1160 | } while (rreq->submitted < rreq->len); | |
1161 | ||
1162 | /* Keep nr_rd_ops incremented so that the ref always belongs to us, and | |
1163 | * the service code isn't punted off to a random thread pool to | |
1164 | * process. | |
1165 | */ | |
1166 | for (;;) { | |
1167 | wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1); | |
1168 | netfs_rreq_assess(rreq, false); | |
1169 | if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) | |
1170 | break; | |
1171 | cond_resched(); | |
1172 | } | |
1173 | ||
1174 | ret = rreq->error; | |
0246f3e5 DH |
1175 | if (ret == 0 && rreq->submitted < rreq->len) { |
1176 | trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_write_begin); | |
e1b1240c | 1177 | ret = -EIO; |
0246f3e5 | 1178 | } |
e1b1240c DH |
1179 | netfs_put_read_request(rreq, false); |
1180 | if (ret < 0) | |
1181 | goto error; | |
1182 | ||
78525c74 DH |
1183 | have_folio: |
1184 | ret = folio_wait_fscache_killable(folio); | |
e1b1240c DH |
1185 | if (ret < 0) |
1186 | goto error; | |
78525c74 | 1187 | have_folio_no_wait: |
e1b1240c | 1188 | if (netfs_priv) |
3cfef1b6 | 1189 | ops->cleanup(mapping, netfs_priv); |
78525c74 | 1190 | *_folio = folio; |
e1b1240c DH |
1191 | _leave(" = 0"); |
1192 | return 0; | |
1193 | ||
1194 | error_put: | |
1195 | netfs_put_read_request(rreq, false); | |
1196 | error: | |
78525c74 DH |
1197 | folio_unlock(folio); |
1198 | folio_put(folio); | |
e1b1240c | 1199 | if (netfs_priv) |
3cfef1b6 | 1200 | ops->cleanup(mapping, netfs_priv); |
e1b1240c DH |
1201 | _leave(" = %d", ret); |
1202 | return ret; | |
1203 | } | |
1204 | EXPORT_SYMBOL(netfs_write_begin); |