1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Network filesystem read subrequest result collection, assessment and
5 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
9 #include <linux/export.h>
12 #include <linux/pagemap.h>
13 #include <linux/slab.h>
14 #include <linux/task_io_accounting_ops.h>
18 * Clear the unread part of an I/O request.
20 static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
22 netfs_reset_iter(subreq);
23 WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter));
24 iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
25 if (subreq->start + subreq->transferred >= subreq->rreq->i_size)
26 __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
30 * Flush, mark and unlock a folio that's now completely read. If we want to
31 * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
32 * dirty and let writeback handle it.
34 static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
35 struct netfs_io_request *rreq,
36 struct folio_queue *folioq,
39 struct netfs_folio *finfo;
40 struct folio *folio = folioq_folio(folioq, slot);
42 flush_dcache_folio(folio);
43 folio_mark_uptodate(folio);
45 if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
46 finfo = netfs_folio_info(folio);
48 trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
49 if (finfo->netfs_group)
50 folio_change_private(folio, finfo->netfs_group);
52 folio_detach_private(folio);
56 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
57 if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
58 trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
59 folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
60 folio_mark_dirty(folio);
63 trace_netfs_folio(folio, netfs_folio_trace_read_done);
66 folioq_clear(folioq, slot);
68 // TODO: Use of PG_private_2 is deprecated.
69 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
70 netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot);
72 folioq_clear(folioq, slot);
75 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
76 if (folio->index == rreq->no_unlock_folio &&
77 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
80 trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
87 * Unlock any folios that are now completely read. Returns true if the
88 * subrequest is removed from the list.
90 static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq, bool was_async)
92 struct netfs_io_subrequest *prev, *next;
93 struct netfs_io_request *rreq = subreq->rreq;
94 struct folio_queue *folioq = subreq->curr_folioq;
95 size_t avail, prev_donated, next_donated, fsize, part, excess;
98 int slot = subreq->curr_folioq_slot;
100 if (WARN(subreq->transferred > subreq->len,
101 "Subreq overread: R%x[%x] %zu > %zu",
102 rreq->debug_id, subreq->debug_index,
103 subreq->transferred, subreq->len))
104 subreq->transferred = subreq->len;
107 fsize = PAGE_SIZE << subreq->curr_folio_order;
108 fpos = round_down(subreq->start + subreq->consumed, fsize);
111 if (WARN_ON_ONCE(!folioq) ||
112 WARN_ON_ONCE(!folioq_folio(folioq, slot)) ||
113 WARN_ON_ONCE(folioq_folio(folioq, slot)->index != fpos / PAGE_SIZE)) {
114 pr_err("R=%08x[%x] s=%llx-%llx ctl=%zx/%zx/%zx sl=%u\n",
115 rreq->debug_id, subreq->debug_index,
116 subreq->start, subreq->start + subreq->transferred - 1,
117 subreq->consumed, subreq->transferred, subreq->len,
120 struct folio *folio = folioq_folio(folioq, slot);
122 pr_err("folioq: orders=%02x%02x%02x%02x\n",
123 folioq->orders[0], folioq->orders[1],
124 folioq->orders[2], folioq->orders[3]);
126 pr_err("folio: %llx-%llx ix=%llx o=%u qo=%u\n",
127 fpos, fend - 1, folio_pos(folio), folio_order(folio),
128 folioq_folio_order(folioq, slot));
133 /* Try to consume the current folio if we've hit or passed the end of
134 * it. There's a possibility that this subreq doesn't start at the
135 * beginning of the folio, in which case we need to donate to/from the
138 * We also need to include any potential donation back from the
141 prev_donated = READ_ONCE(subreq->prev_donated);
142 next_donated = READ_ONCE(subreq->next_donated);
143 if (prev_donated || next_donated) {
144 spin_lock_bh(&rreq->lock);
145 prev_donated = subreq->prev_donated;
146 next_donated = subreq->next_donated;
147 subreq->start -= prev_donated;
148 subreq->len += prev_donated;
149 subreq->transferred += prev_donated;
150 prev_donated = subreq->prev_donated = 0;
151 if (subreq->transferred == subreq->len) {
152 subreq->len += next_donated;
153 subreq->transferred += next_donated;
154 next_donated = subreq->next_donated = 0;
156 trace_netfs_sreq(subreq, netfs_sreq_trace_add_donations);
157 spin_unlock_bh(&rreq->lock);
160 avail = subreq->transferred;
161 if (avail == subreq->len)
162 avail += next_donated;
163 start = subreq->start;
164 if (subreq->consumed == 0) {
165 start -= prev_donated;
166 avail += prev_donated;
168 start += subreq->consumed;
169 avail -= subreq->consumed;
171 part = umin(avail, fsize);
173 trace_netfs_progress(subreq, start, avail, part);
175 if (start + avail >= fend) {
177 /* Flush, unlock and mark for caching any folio we've just read. */
178 subreq->consumed = fend - subreq->start;
179 netfs_unlock_read_folio(subreq, rreq, folioq, slot);
180 folioq_mark2(folioq, slot);
181 if (subreq->consumed >= subreq->len)
183 } else if (fpos < start) {
184 excess = fend - subreq->start;
186 spin_lock_bh(&rreq->lock);
187 /* If we complete first on a folio split with the
188 * preceding subreq, donate to that subreq - otherwise
189 * we get the responsibility.
191 if (subreq->prev_donated != prev_donated) {
192 spin_unlock_bh(&rreq->lock);
193 goto donation_changed;
196 if (list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
197 spin_unlock_bh(&rreq->lock);
198 pr_err("Can't donate prior to front\n");
202 prev = list_prev_entry(subreq, rreq_link);
203 WRITE_ONCE(prev->next_donated, prev->next_donated + excess);
204 subreq->start += excess;
205 subreq->len -= excess;
206 subreq->transferred -= excess;
207 trace_netfs_donate(rreq, subreq, prev, excess,
208 netfs_trace_donate_tail_to_prev);
209 trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
211 if (subreq->consumed >= subreq->len)
212 goto remove_subreq_locked;
213 spin_unlock_bh(&rreq->lock);
215 pr_err("fpos > start\n");
219 /* Advance the rolling buffer to the next folio. */
221 if (slot >= folioq_nr_slots(folioq)) {
223 folioq = folioq->next;
224 subreq->curr_folioq = folioq;
226 subreq->curr_folioq_slot = slot;
227 if (folioq && folioq_folio(folioq, slot))
228 subreq->curr_folio_order = folioq->orders[slot];
234 /* Deal with partial progress. */
235 if (subreq->transferred < subreq->len)
238 /* Donate the remaining downloaded data to one of the neighbouring
239 * subrequests. Note that we may race with them doing the same thing.
241 spin_lock_bh(&rreq->lock);
243 if (subreq->prev_donated != prev_donated ||
244 subreq->next_donated != next_donated) {
245 spin_unlock_bh(&rreq->lock);
247 goto donation_changed;
250 /* Deal with the trickiest case: that this subreq is in the middle of a
251 * folio, not touching either edge, but finishes first. In such a
252 * case, we donate to the previous subreq, if there is one and if it is
253 * contiguous, so that the donation is only handled when that completes
254 * - and remove this subreq from the list.
256 * If the previous subreq finished first, we will have acquired their
257 * donation and should be able to unlock folios and/or donate nextwards.
259 if (!subreq->consumed &&
261 !list_is_first(&subreq->rreq_link, &rreq->subrequests) &&
262 subreq->start == prev->start + prev->len) {
263 prev = list_prev_entry(subreq, rreq_link);
264 WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len);
265 subreq->start += subreq->len;
267 subreq->transferred = 0;
268 trace_netfs_donate(rreq, subreq, prev, subreq->len,
269 netfs_trace_donate_to_prev);
270 trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
271 goto remove_subreq_locked;
274 /* If we can't donate down the chain, donate up the chain instead. */
275 excess = subreq->len - subreq->consumed + next_donated;
277 if (!subreq->consumed)
278 excess += prev_donated;
280 if (list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
281 rreq->prev_donated = excess;
282 trace_netfs_donate(rreq, subreq, NULL, excess,
283 netfs_trace_donate_to_deferred_next);
285 next = list_next_entry(subreq, rreq_link);
286 WRITE_ONCE(next->prev_donated, excess);
287 trace_netfs_donate(rreq, subreq, next, excess,
288 netfs_trace_donate_to_next);
290 trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_next);
291 subreq->len = subreq->consumed;
292 subreq->transferred = subreq->consumed;
293 goto remove_subreq_locked;
296 spin_lock_bh(&rreq->lock);
297 remove_subreq_locked:
298 subreq->consumed = subreq->len;
299 list_del(&subreq->rreq_link);
300 spin_unlock_bh(&rreq->lock);
301 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_consumed);
305 /* Errr... prev and next both donated to us, but insufficient to finish
308 printk("R=%08x[%x] s=%llx-%llx %zx/%zx/%zx\n",
309 rreq->debug_id, subreq->debug_index,
310 subreq->start, subreq->start + subreq->transferred - 1,
311 subreq->consumed, subreq->transferred, subreq->len);
312 printk("folio: %llx-%llx\n", fpos, fend - 1);
313 printk("donated: prev=%zx next=%zx\n", prev_donated, next_donated);
314 printk("s=%llx av=%zx part=%zx\n", start, avail, part);
319 * Do page flushing and suchlike after DIO.
321 static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
323 struct netfs_io_subrequest *subreq;
326 /* Collect unbuffered reads and direct reads, adding up the transfer
327 * sizes until we find the first short or failed subrequest.
329 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
330 rreq->transferred += subreq->transferred;
332 if (subreq->transferred < subreq->len ||
333 test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
334 rreq->error = subreq->error;
339 if (rreq->origin == NETFS_DIO_READ) {
340 for (i = 0; i < rreq->direct_bv_count; i++) {
341 flush_dcache_page(rreq->direct_bv[i].bv_page);
342 // TODO: cifs marks pages in the destination buffer
343 // dirty under some circumstances after a read. Do we
344 // need to do that too?
345 set_page_dirty(rreq->direct_bv[i].bv_page);
350 rreq->iocb->ki_pos += rreq->transferred;
351 if (rreq->iocb->ki_complete)
352 rreq->iocb->ki_complete(
353 rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
355 if (rreq->netfs_ops->done)
356 rreq->netfs_ops->done(rreq);
357 if (rreq->origin == NETFS_DIO_READ)
358 inode_dio_end(rreq->inode);
362 * Assess the state of a read request and decide what to do next.
364 * Note that we're in normal kernel thread context at this point, possibly
365 * running on a workqueue.
367 static void netfs_rreq_assess(struct netfs_io_request *rreq)
369 trace_netfs_rreq(rreq, netfs_rreq_trace_assess);
371 //netfs_rreq_is_still_valid(rreq);
373 if (test_and_clear_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags)) {
374 netfs_retry_reads(rreq);
378 if (rreq->origin == NETFS_DIO_READ ||
379 rreq->origin == NETFS_READ_GAPS)
380 netfs_rreq_assess_dio(rreq);
381 task_io_account_read(rreq->transferred);
383 trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
384 clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
386 trace_netfs_rreq(rreq, netfs_rreq_trace_done);
387 netfs_clear_subrequests(rreq, false);
388 netfs_unlock_abandoned_read_pages(rreq);
389 if (unlikely(test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)))
390 netfs_pgpriv2_write_to_the_cache(rreq);
393 void netfs_read_termination_worker(struct work_struct *work)
395 struct netfs_io_request *rreq =
396 container_of(work, struct netfs_io_request, work);
397 netfs_see_request(rreq, netfs_rreq_trace_see_work);
398 netfs_rreq_assess(rreq);
399 netfs_put_request(rreq, false, netfs_rreq_trace_put_work_complete);
403 * Handle the completion of all outstanding I/O operations on a read request.
404 * We inherit a ref from the caller.
406 void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async)
409 return netfs_rreq_assess(rreq);
410 if (!work_pending(&rreq->work)) {
411 netfs_get_request(rreq, netfs_rreq_trace_get_work);
412 if (!queue_work(system_unbound_wq, &rreq->work))
413 netfs_put_request(rreq, was_async, netfs_rreq_trace_put_work_nq);
418 * netfs_read_subreq_progress - Note progress of a read operation.
419 * @subreq: The read request that has terminated.
420 * @was_async: True if we're in an asynchronous context.
422 * This tells the read side of netfs lib that a contributory I/O operation has
423 * made some progress and that it may be possible to unlock some folios.
425 * Before calling, the filesystem should update subreq->transferred to track
426 * the amount of data copied into the output buffer.
428 * If @was_async is true, the caller might be running in softirq or interrupt
429 * context and we can't sleep.
431 void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
434 struct netfs_io_request *rreq = subreq->rreq;
436 trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
438 if (subreq->transferred > subreq->consumed &&
439 (rreq->origin == NETFS_READAHEAD ||
440 rreq->origin == NETFS_READPAGE ||
441 rreq->origin == NETFS_READ_FOR_WRITE)) {
442 netfs_consume_read_data(subreq, was_async);
443 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
446 EXPORT_SYMBOL(netfs_read_subreq_progress);
449 * netfs_read_subreq_terminated - Note the termination of an I/O operation.
450 * @subreq: The I/O request that has terminated.
451 * @error: Error code indicating type of completion.
452 * @was_async: The termination was asynchronous
454 * This tells the read helper that a contributory I/O operation has terminated,
455 * one way or another, and that it should integrate the results.
457 * The caller indicates the outcome of the operation through @error, supplying
458 * 0 to indicate a successful or retryable transfer (if NETFS_SREQ_NEED_RETRY
459 * is set) or a negative error code. The helper will look after reissuing I/O
460 * operations as appropriate and writing downloaded data to the cache.
462 * Before calling, the filesystem should update subreq->transferred to track
463 * the amount of data copied into the output buffer.
465 * If @was_async is true, the caller might be running in softirq or interrupt
466 * context and we can't sleep.
468 void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
469 int error, bool was_async)
471 struct netfs_io_request *rreq = subreq->rreq;
473 switch (subreq->source) {
474 case NETFS_READ_FROM_CACHE:
475 netfs_stat(&netfs_n_rh_read_done);
477 case NETFS_DOWNLOAD_FROM_SERVER:
478 netfs_stat(&netfs_n_rh_download_done);
484 if (rreq->origin != NETFS_DIO_READ) {
485 /* Collect buffered reads.
487 * If the read completed validly short, then we can clear the
488 * tail before going on to unlock the folios.
490 if (error == 0 && subreq->transferred < subreq->len &&
491 (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags) ||
492 test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags))) {
493 netfs_clear_unread(subreq);
494 subreq->transferred = subreq->len;
495 trace_netfs_sreq(subreq, netfs_sreq_trace_clear);
497 if (subreq->transferred > subreq->consumed &&
498 (rreq->origin == NETFS_READAHEAD ||
499 rreq->origin == NETFS_READPAGE ||
500 rreq->origin == NETFS_READ_FOR_WRITE)) {
501 netfs_consume_read_data(subreq, was_async);
502 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
504 rreq->transferred += subreq->transferred;
507 /* Deal with retry requests, short reads and errors. If we retry
508 * but don't make progress, we abandon the attempt.
510 if (!error && subreq->transferred < subreq->len) {
511 if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
512 trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
514 trace_netfs_sreq(subreq, netfs_sreq_trace_short);
515 if (subreq->transferred > subreq->consumed) {
516 /* If we didn't read new data, abandon retry. */
517 if (subreq->retry_count &&
518 test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
519 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
520 set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
522 } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
523 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
524 set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
526 __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
532 subreq->error = error;
533 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
535 if (unlikely(error < 0)) {
536 trace_netfs_failure(rreq, subreq, error, netfs_fail_read);
537 if (subreq->source == NETFS_READ_FROM_CACHE) {
538 netfs_stat(&netfs_n_rh_read_failed);
540 netfs_stat(&netfs_n_rh_download_failed);
541 set_bit(NETFS_RREQ_FAILED, &rreq->flags);
542 rreq->error = subreq->error;
546 if (atomic_dec_and_test(&rreq->nr_outstanding))
547 netfs_rreq_terminated(rreq, was_async);
549 netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
551 EXPORT_SYMBOL(netfs_read_subreq_terminated);