]> Git Repo - linux.git/blob - fs/bcachefs/io_read.c
Linux 6.14-rc3
[linux.git] / fs / bcachefs / io_read.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Some low level IO code, and hacks for various block layer limitations
4  *
5  * Copyright 2010, 2011 Kent Overstreet <[email protected]>
6  * Copyright 2012 Google, Inc.
7  */
8
9 #include "bcachefs.h"
10 #include "alloc_background.h"
11 #include "alloc_foreground.h"
12 #include "btree_update.h"
13 #include "buckets.h"
14 #include "checksum.h"
15 #include "clock.h"
16 #include "compress.h"
17 #include "data_update.h"
18 #include "disk_groups.h"
19 #include "ec.h"
20 #include "error.h"
21 #include "io_read.h"
22 #include "io_misc.h"
23 #include "io_write.h"
24 #include "reflink.h"
25 #include "subvolume.h"
26 #include "trace.h"
27
28 #include <linux/sched/mm.h>
29
30 #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
31
32 static bool bch2_target_congested(struct bch_fs *c, u16 target)
33 {
34         const struct bch_devs_mask *devs;
35         unsigned d, nr = 0, total = 0;
36         u64 now = local_clock(), last;
37         s64 congested;
38         struct bch_dev *ca;
39
40         if (!target)
41                 return false;
42
43         rcu_read_lock();
44         devs = bch2_target_to_mask(c, target) ?:
45                 &c->rw_devs[BCH_DATA_user];
46
47         for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
48                 ca = rcu_dereference(c->devs[d]);
49                 if (!ca)
50                         continue;
51
52                 congested = atomic_read(&ca->congested);
53                 last = READ_ONCE(ca->congested_last);
54                 if (time_after64(now, last))
55                         congested -= (now - last) >> 12;
56
57                 total += max(congested, 0LL);
58                 nr++;
59         }
60         rcu_read_unlock();
61
62         return bch2_rand_range(nr * CONGESTED_MAX) < total;
63 }
64
65 #else
66
67 static bool bch2_target_congested(struct bch_fs *c, u16 target)
68 {
69         return false;
70 }
71
72 #endif
73
74 /* Cache promotion on read */
75
76 struct promote_op {
77         struct rcu_head         rcu;
78         u64                     start_time;
79
80         struct rhash_head       hash;
81         struct bpos             pos;
82
83         struct data_update      write;
84         struct bio_vec          bi_inline_vecs[]; /* must be last */
85 };
86
87 static const struct rhashtable_params bch_promote_params = {
88         .head_offset            = offsetof(struct promote_op, hash),
89         .key_offset             = offsetof(struct promote_op, pos),
90         .key_len                = sizeof(struct bpos),
91         .automatic_shrinking    = true,
92 };
93
94 static inline bool have_io_error(struct bch_io_failures *failed)
95 {
96         return failed && failed->nr;
97 }
98
99 static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
100                                   struct bpos pos,
101                                   struct bch_io_opts opts,
102                                   unsigned flags,
103                                   struct bch_io_failures *failed)
104 {
105         if (!have_io_error(failed)) {
106                 BUG_ON(!opts.promote_target);
107
108                 if (!(flags & BCH_READ_MAY_PROMOTE))
109                         return -BCH_ERR_nopromote_may_not;
110
111                 if (bch2_bkey_has_target(c, k, opts.promote_target))
112                         return -BCH_ERR_nopromote_already_promoted;
113
114                 if (bkey_extent_is_unwritten(k))
115                         return -BCH_ERR_nopromote_unwritten;
116
117                 if (bch2_target_congested(c, opts.promote_target))
118                         return -BCH_ERR_nopromote_congested;
119         }
120
121         if (rhashtable_lookup_fast(&c->promote_table, &pos,
122                                    bch_promote_params))
123                 return -BCH_ERR_nopromote_in_flight;
124
125         return 0;
126 }
127
128 static void promote_free(struct bch_fs *c, struct promote_op *op)
129 {
130         int ret;
131
132         bch2_data_update_exit(&op->write);
133
134         ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
135                                      bch_promote_params);
136         BUG_ON(ret);
137         bch2_write_ref_put(c, BCH_WRITE_REF_promote);
138         kfree_rcu(op, rcu);
139 }
140
141 static void promote_done(struct bch_write_op *wop)
142 {
143         struct promote_op *op =
144                 container_of(wop, struct promote_op, write.op);
145         struct bch_fs *c = op->write.op.c;
146
147         bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
148                                op->start_time);
149         promote_free(c, op);
150 }
151
152 static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
153 {
154         struct bio *bio = &op->write.op.wbio.bio;
155
156         trace_and_count(op->write.op.c, read_promote, &rbio->bio);
157
158         /* we now own pages: */
159         BUG_ON(!rbio->bounce);
160         BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
161
162         memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
163                sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
164         swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
165
166         bch2_data_update_read_done(&op->write, rbio->pick.crc);
167 }
168
169 static struct promote_op *__promote_alloc(struct btree_trans *trans,
170                                           enum btree_id btree_id,
171                                           struct bkey_s_c k,
172                                           struct bpos pos,
173                                           struct extent_ptr_decoded *pick,
174                                           struct bch_io_opts opts,
175                                           unsigned sectors,
176                                           struct bch_read_bio **rbio,
177                                           struct bch_io_failures *failed)
178 {
179         struct bch_fs *c = trans->c;
180         struct promote_op *op = NULL;
181         struct bio *bio;
182         unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
183         int ret;
184
185         if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
186                 return ERR_PTR(-BCH_ERR_nopromote_no_writes);
187
188         op = kzalloc(struct_size(op, bi_inline_vecs, pages), GFP_KERNEL);
189         if (!op) {
190                 ret = -BCH_ERR_nopromote_enomem;
191                 goto err;
192         }
193
194         op->start_time = local_clock();
195         op->pos = pos;
196
197         /*
198          * We don't use the mempool here because extents that aren't
199          * checksummed or compressed can be too big for the mempool:
200          */
201         *rbio = kzalloc(sizeof(struct bch_read_bio) +
202                         sizeof(struct bio_vec) * pages,
203                         GFP_KERNEL);
204         if (!*rbio) {
205                 ret = -BCH_ERR_nopromote_enomem;
206                 goto err;
207         }
208
209         rbio_init(&(*rbio)->bio, opts);
210         bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0);
211
212         if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, GFP_KERNEL)) {
213                 ret = -BCH_ERR_nopromote_enomem;
214                 goto err;
215         }
216
217         (*rbio)->bounce         = true;
218         (*rbio)->split          = true;
219         (*rbio)->kmalloc        = true;
220
221         if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
222                                           bch_promote_params)) {
223                 ret = -BCH_ERR_nopromote_in_flight;
224                 goto err;
225         }
226
227         bio = &op->write.op.wbio.bio;
228         bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
229
230         struct data_update_opts update_opts = {};
231
232         if (!have_io_error(failed)) {
233                 update_opts.target = opts.promote_target;
234                 update_opts.extra_replicas = 1;
235                 update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED;
236         } else {
237                 update_opts.target = opts.foreground_target;
238
239                 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
240                 unsigned ptr_bit = 1;
241                 bkey_for_each_ptr(ptrs, ptr) {
242                         if (bch2_dev_io_failures(failed, ptr->dev))
243                                 update_opts.rewrite_ptrs |= ptr_bit;
244                         ptr_bit <<= 1;
245                 }
246         }
247
248         ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
249                         writepoint_hashed((unsigned long) current),
250                         opts,
251                         update_opts,
252                         btree_id, k);
253         /*
254          * possible errors: -BCH_ERR_nocow_lock_blocked,
255          * -BCH_ERR_ENOSPC_disk_reservation:
256          */
257         if (ret) {
258                 BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
259                                               bch_promote_params));
260                 goto err;
261         }
262
263         op->write.op.end_io = promote_done;
264
265         return op;
266 err:
267         if (*rbio)
268                 bio_free_pages(&(*rbio)->bio);
269         kfree(*rbio);
270         *rbio = NULL;
271         /* We may have added to the rhashtable and thus need rcu freeing: */
272         kfree_rcu(op, rcu);
273         bch2_write_ref_put(c, BCH_WRITE_REF_promote);
274         return ERR_PTR(ret);
275 }
276
277 noinline
278 static struct promote_op *promote_alloc(struct btree_trans *trans,
279                                         struct bvec_iter iter,
280                                         struct bkey_s_c k,
281                                         struct extent_ptr_decoded *pick,
282                                         struct bch_io_opts opts,
283                                         unsigned flags,
284                                         struct bch_read_bio **rbio,
285                                         bool *bounce,
286                                         bool *read_full,
287                                         struct bch_io_failures *failed)
288 {
289         struct bch_fs *c = trans->c;
290         /*
291          * if failed != NULL we're not actually doing a promote, we're
292          * recovering from an io/checksum error
293          */
294         bool promote_full = (have_io_error(failed) ||
295                              *read_full ||
296                              READ_ONCE(c->opts.promote_whole_extents));
297         /* data might have to be decompressed in the write path: */
298         unsigned sectors = promote_full
299                 ? max(pick->crc.compressed_size, pick->crc.live_size)
300                 : bvec_iter_sectors(iter);
301         struct bpos pos = promote_full
302                 ? bkey_start_pos(k.k)
303                 : POS(k.k->p.inode, iter.bi_sector);
304         struct promote_op *promote;
305         int ret;
306
307         ret = should_promote(c, k, pos, opts, flags, failed);
308         if (ret)
309                 goto nopromote;
310
311         promote = __promote_alloc(trans,
312                                   k.k->type == KEY_TYPE_reflink_v
313                                   ? BTREE_ID_reflink
314                                   : BTREE_ID_extents,
315                                   k, pos, pick, opts, sectors, rbio, failed);
316         ret = PTR_ERR_OR_ZERO(promote);
317         if (ret)
318                 goto nopromote;
319
320         *bounce         = true;
321         *read_full      = promote_full;
322         return promote;
323 nopromote:
324         trace_read_nopromote(c, ret);
325         return NULL;
326 }
327
328 /* Read */
329
330 static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
331                                    struct bch_read_bio *rbio, struct bpos read_pos)
332 {
333         return bch2_inum_offset_err_msg_trans(trans, out,
334                 (subvol_inum) { rbio->subvol, read_pos.inode },
335                 read_pos.offset << 9);
336 }
337
338 static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out,
339                               struct bch_read_bio *rbio, struct bpos read_pos)
340 {
341         bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos));
342 }
343
344 #define READ_RETRY_AVOID        1
345 #define READ_RETRY              2
346 #define READ_ERR                3
347
348 enum rbio_context {
349         RBIO_CONTEXT_NULL,
350         RBIO_CONTEXT_HIGHPRI,
351         RBIO_CONTEXT_UNBOUND,
352 };
353
354 static inline struct bch_read_bio *
355 bch2_rbio_parent(struct bch_read_bio *rbio)
356 {
357         return rbio->split ? rbio->parent : rbio;
358 }
359
360 __always_inline
361 static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn,
362                            enum rbio_context context,
363                            struct workqueue_struct *wq)
364 {
365         if (context <= rbio->context) {
366                 fn(&rbio->work);
367         } else {
368                 rbio->work.func         = fn;
369                 rbio->context           = context;
370                 queue_work(wq, &rbio->work);
371         }
372 }
373
374 static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
375 {
376         BUG_ON(rbio->bounce && !rbio->split);
377
378         if (rbio->promote)
379                 promote_free(rbio->c, rbio->promote);
380         rbio->promote = NULL;
381
382         if (rbio->bounce)
383                 bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
384
385         if (rbio->split) {
386                 struct bch_read_bio *parent = rbio->parent;
387
388                 if (rbio->kmalloc)
389                         kfree(rbio);
390                 else
391                         bio_put(&rbio->bio);
392
393                 rbio = parent;
394         }
395
396         return rbio;
397 }
398
399 /*
400  * Only called on a top level bch_read_bio to complete an entire read request,
401  * not a split:
402  */
403 static void bch2_rbio_done(struct bch_read_bio *rbio)
404 {
405         if (rbio->start_time)
406                 bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
407                                        rbio->start_time);
408         bio_endio(&rbio->bio);
409 }
410
411 static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
412                                      struct bvec_iter bvec_iter,
413                                      struct bch_io_failures *failed,
414                                      unsigned flags)
415 {
416         struct btree_trans *trans = bch2_trans_get(c);
417         struct btree_iter iter;
418         struct bkey_buf sk;
419         struct bkey_s_c k;
420         int ret;
421
422         flags &= ~BCH_READ_LAST_FRAGMENT;
423         flags |= BCH_READ_MUST_CLONE;
424
425         bch2_bkey_buf_init(&sk);
426
427         bch2_trans_iter_init(trans, &iter, rbio->data_btree,
428                              rbio->read_pos, BTREE_ITER_slots);
429 retry:
430         bch2_trans_begin(trans);
431         rbio->bio.bi_status = 0;
432
433         ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
434         if (ret)
435                 goto err;
436
437         bch2_bkey_buf_reassemble(&sk, c, k);
438         k = bkey_i_to_s_c(sk.k);
439
440         if (!bch2_bkey_matches_ptr(c, k,
441                                    rbio->pick.ptr,
442                                    rbio->data_pos.offset -
443                                    rbio->pick.crc.offset)) {
444                 /* extent we wanted to read no longer exists: */
445                 rbio->hole = true;
446                 goto out;
447         }
448
449         ret = __bch2_read_extent(trans, rbio, bvec_iter,
450                                  rbio->read_pos,
451                                  rbio->data_btree,
452                                  k, 0, failed, flags);
453         if (ret == READ_RETRY)
454                 goto retry;
455         if (ret)
456                 goto err;
457 out:
458         bch2_rbio_done(rbio);
459         bch2_trans_iter_exit(trans, &iter);
460         bch2_trans_put(trans);
461         bch2_bkey_buf_exit(&sk, c);
462         return;
463 err:
464         rbio->bio.bi_status = BLK_STS_IOERR;
465         goto out;
466 }
467
468 static void bch2_rbio_retry(struct work_struct *work)
469 {
470         struct bch_read_bio *rbio =
471                 container_of(work, struct bch_read_bio, work);
472         struct bch_fs *c        = rbio->c;
473         struct bvec_iter iter   = rbio->bvec_iter;
474         unsigned flags          = rbio->flags;
475         subvol_inum inum = {
476                 .subvol = rbio->subvol,
477                 .inum   = rbio->read_pos.inode,
478         };
479         struct bch_io_failures failed = { .nr = 0 };
480
481         trace_and_count(c, read_retry, &rbio->bio);
482
483         if (rbio->retry == READ_RETRY_AVOID)
484                 bch2_mark_io_failure(&failed, &rbio->pick);
485
486         rbio->bio.bi_status = 0;
487
488         rbio = bch2_rbio_free(rbio);
489
490         flags |= BCH_READ_IN_RETRY;
491         flags &= ~BCH_READ_MAY_PROMOTE;
492
493         if (flags & BCH_READ_NODECODE) {
494                 bch2_read_retry_nodecode(c, rbio, iter, &failed, flags);
495         } else {
496                 flags &= ~BCH_READ_LAST_FRAGMENT;
497                 flags |= BCH_READ_MUST_CLONE;
498
499                 __bch2_read(c, rbio, iter, inum, &failed, flags);
500         }
501 }
502
503 static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
504                             blk_status_t error)
505 {
506         rbio->retry = retry;
507
508         if (rbio->flags & BCH_READ_IN_RETRY)
509                 return;
510
511         if (retry == READ_ERR) {
512                 rbio = bch2_rbio_free(rbio);
513
514                 rbio->bio.bi_status = error;
515                 bch2_rbio_done(rbio);
516         } else {
517                 bch2_rbio_punt(rbio, bch2_rbio_retry,
518                                RBIO_CONTEXT_UNBOUND, system_unbound_wq);
519         }
520 }
521
522 static void bch2_read_io_err(struct work_struct *work)
523 {
524         struct bch_read_bio *rbio =
525                 container_of(work, struct bch_read_bio, work);
526         struct bio *bio = &rbio->bio;
527         struct bch_fs *c        = rbio->c;
528         struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
529         struct printbuf buf = PRINTBUF;
530
531         bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
532         prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status));
533
534         if (ca) {
535                 bch2_io_error(ca, BCH_MEMBER_ERROR_read);
536                 bch_err_ratelimited(ca, "%s", buf.buf);
537         } else {
538                 bch_err_ratelimited(c, "%s", buf.buf);
539         }
540
541         printbuf_exit(&buf);
542         bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
543 }
544
545 static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
546                                    struct bch_read_bio *rbio)
547 {
548         struct bch_fs *c = rbio->c;
549         u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
550         struct bch_extent_crc_unpacked new_crc;
551         struct btree_iter iter;
552         struct bkey_i *new;
553         struct bkey_s_c k;
554         int ret = 0;
555
556         if (crc_is_compressed(rbio->pick.crc))
557                 return 0;
558
559         k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos,
560                                BTREE_ITER_slots|BTREE_ITER_intent);
561         if ((ret = bkey_err(k)))
562                 goto out;
563
564         if (bversion_cmp(k.k->bversion, rbio->version) ||
565             !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
566                 goto out;
567
568         /* Extent was merged? */
569         if (bkey_start_offset(k.k) < data_offset ||
570             k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
571                 goto out;
572
573         if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
574                         rbio->pick.crc, NULL, &new_crc,
575                         bkey_start_offset(k.k) - data_offset, k.k->size,
576                         rbio->pick.crc.csum_type)) {
577                 bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
578                 ret = 0;
579                 goto out;
580         }
581
582         /*
583          * going to be temporarily appending another checksum entry:
584          */
585         new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
586                                  sizeof(struct bch_extent_crc128));
587         if ((ret = PTR_ERR_OR_ZERO(new)))
588                 goto out;
589
590         bkey_reassemble(new, k);
591
592         if (!bch2_bkey_narrow_crcs(new, new_crc))
593                 goto out;
594
595         ret = bch2_trans_update(trans, &iter, new,
596                                 BTREE_UPDATE_internal_snapshot_node);
597 out:
598         bch2_trans_iter_exit(trans, &iter);
599         return ret;
600 }
601
602 static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
603 {
604         bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
605                              __bch2_rbio_narrow_crcs(trans, rbio));
606 }
607
608 static void bch2_read_csum_err(struct work_struct *work)
609 {
610         struct bch_read_bio *rbio =
611                 container_of(work, struct bch_read_bio, work);
612         struct bch_fs *c        = rbio->c;
613         struct bio *src         = &rbio->bio;
614         struct bch_extent_crc_unpacked crc = rbio->pick.crc;
615         struct nonce nonce = extent_nonce(rbio->version, crc);
616         struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
617         struct printbuf buf = PRINTBUF;
618
619         bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
620         prt_str(&buf, "data ");
621         bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
622
623         struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
624         if (ca) {
625                 bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
626                 bch_err_ratelimited(ca, "%s", buf.buf);
627         } else {
628                 bch_err_ratelimited(c, "%s", buf.buf);
629         }
630
631         bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
632         printbuf_exit(&buf);
633 }
634
635 static void bch2_read_decompress_err(struct work_struct *work)
636 {
637         struct bch_read_bio *rbio =
638                 container_of(work, struct bch_read_bio, work);
639         struct bch_fs *c        = rbio->c;
640         struct printbuf buf = PRINTBUF;
641
642         bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
643         prt_str(&buf, "decompression error");
644
645         struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
646         if (ca)
647                 bch_err_ratelimited(ca, "%s", buf.buf);
648         else
649                 bch_err_ratelimited(c, "%s", buf.buf);
650
651         bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
652         printbuf_exit(&buf);
653 }
654
655 static void bch2_read_decrypt_err(struct work_struct *work)
656 {
657         struct bch_read_bio *rbio =
658                 container_of(work, struct bch_read_bio, work);
659         struct bch_fs *c        = rbio->c;
660         struct printbuf buf = PRINTBUF;
661
662         bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
663         prt_str(&buf, "decrypt error");
664
665         struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
666         if (ca)
667                 bch_err_ratelimited(ca, "%s", buf.buf);
668         else
669                 bch_err_ratelimited(c, "%s", buf.buf);
670
671         bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
672         printbuf_exit(&buf);
673 }
674
675 /* Inner part that may run in process context */
676 static void __bch2_read_endio(struct work_struct *work)
677 {
678         struct bch_read_bio *rbio =
679                 container_of(work, struct bch_read_bio, work);
680         struct bch_fs *c        = rbio->c;
681         struct bio *src         = &rbio->bio;
682         struct bio *dst         = &bch2_rbio_parent(rbio)->bio;
683         struct bvec_iter dst_iter = rbio->bvec_iter;
684         struct bch_extent_crc_unpacked crc = rbio->pick.crc;
685         struct nonce nonce = extent_nonce(rbio->version, crc);
686         unsigned nofs_flags;
687         struct bch_csum csum;
688         int ret;
689
690         nofs_flags = memalloc_nofs_save();
691
692         /* Reset iterator for checksumming and copying bounced data: */
693         if (rbio->bounce) {
694                 src->bi_iter.bi_size            = crc.compressed_size << 9;
695                 src->bi_iter.bi_idx             = 0;
696                 src->bi_iter.bi_bvec_done       = 0;
697         } else {
698                 src->bi_iter                    = rbio->bvec_iter;
699         }
700
701         csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
702         if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io)
703                 goto csum_err;
704
705         /*
706          * XXX
707          * We need to rework the narrow_crcs path to deliver the read completion
708          * first, and then punt to a different workqueue, otherwise we're
709          * holding up reads while doing btree updates which is bad for memory
710          * reclaim.
711          */
712         if (unlikely(rbio->narrow_crcs))
713                 bch2_rbio_narrow_crcs(rbio);
714
715         if (rbio->flags & BCH_READ_NODECODE)
716                 goto nodecode;
717
718         /* Adjust crc to point to subset of data we want: */
719         crc.offset     += rbio->offset_into_extent;
720         crc.live_size   = bvec_iter_sectors(rbio->bvec_iter);
721
722         if (crc_is_compressed(crc)) {
723                 ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
724                 if (ret)
725                         goto decrypt_err;
726
727                 if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
728                     !c->opts.no_data_io)
729                         goto decompression_err;
730         } else {
731                 /* don't need to decrypt the entire bio: */
732                 nonce = nonce_add(nonce, crc.offset << 9);
733                 bio_advance(src, crc.offset << 9);
734
735                 BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
736                 src->bi_iter.bi_size = dst_iter.bi_size;
737
738                 ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
739                 if (ret)
740                         goto decrypt_err;
741
742                 if (rbio->bounce) {
743                         struct bvec_iter src_iter = src->bi_iter;
744
745                         bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
746                 }
747         }
748
749         if (rbio->promote) {
750                 /*
751                  * Re encrypt data we decrypted, so it's consistent with
752                  * rbio->crc:
753                  */
754                 ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
755                 if (ret)
756                         goto decrypt_err;
757
758                 promote_start(rbio->promote, rbio);
759                 rbio->promote = NULL;
760         }
761 nodecode:
762         if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
763                 rbio = bch2_rbio_free(rbio);
764                 bch2_rbio_done(rbio);
765         }
766 out:
767         memalloc_nofs_restore(nofs_flags);
768         return;
769 csum_err:
770         /*
771          * Checksum error: if the bio wasn't bounced, we may have been
772          * reading into buffers owned by userspace (that userspace can
773          * scribble over) - retry the read, bouncing it this time:
774          */
775         if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
776                 rbio->flags |= BCH_READ_MUST_BOUNCE;
777                 bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
778                 goto out;
779         }
780
781         bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
782         goto out;
783 decompression_err:
784         bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
785         goto out;
786 decrypt_err:
787         bch2_rbio_punt(rbio, bch2_read_decrypt_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
788         goto out;
789 }
790
791 static void bch2_read_endio(struct bio *bio)
792 {
793         struct bch_read_bio *rbio =
794                 container_of(bio, struct bch_read_bio, bio);
795         struct bch_fs *c        = rbio->c;
796         struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
797         struct workqueue_struct *wq = NULL;
798         enum rbio_context context = RBIO_CONTEXT_NULL;
799
800         if (rbio->have_ioref) {
801                 bch2_latency_acct(ca, rbio->submit_time, READ);
802                 percpu_ref_put(&ca->io_ref);
803         }
804
805         if (!rbio->split)
806                 rbio->bio.bi_end_io = rbio->end_io;
807
808         if (unlikely(bio->bi_status)) {
809                 bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
810                 return;
811         }
812
813         if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
814             (ca && dev_ptr_stale(ca, &rbio->pick.ptr))) {
815                 trace_and_count(c, read_reuse_race, &rbio->bio);
816
817                 if (rbio->flags & BCH_READ_RETRY_IF_STALE)
818                         bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
819                 else
820                         bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
821                 return;
822         }
823
824         if (rbio->narrow_crcs ||
825             rbio->promote ||
826             crc_is_compressed(rbio->pick.crc) ||
827             bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
828                 context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq;
829         else if (rbio->pick.crc.csum_type)
830                 context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq;
831
832         bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
833 }
834
835 static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
836                                                    struct bch_dev *ca,
837                                                    struct bkey_s_c k,
838                                                    struct bch_extent_ptr ptr)
839 {
840         struct bch_fs *c = trans->c;
841         struct btree_iter iter;
842         struct printbuf buf = PRINTBUF;
843         int ret;
844
845         bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
846                              PTR_BUCKET_POS(ca, &ptr),
847                              BTREE_ITER_cached);
848
849         int gen = bucket_gen_get(ca, iter.pos.offset);
850         if (gen >= 0) {
851                 prt_printf(&buf, "Attempting to read from stale dirty pointer:\n");
852                 printbuf_indent_add(&buf, 2);
853
854                 bch2_bkey_val_to_text(&buf, c, k);
855                 prt_newline(&buf);
856
857                 prt_printf(&buf, "memory gen: %u", gen);
858
859                 ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
860                 if (!ret) {
861                         prt_newline(&buf);
862                         bch2_bkey_val_to_text(&buf, c, k);
863                 }
864         } else {
865                 prt_printf(&buf, "Attempting to read from invalid bucket %llu:%llu:\n",
866                            iter.pos.inode, iter.pos.offset);
867                 printbuf_indent_add(&buf, 2);
868
869                 prt_printf(&buf, "first bucket %u nbuckets %llu\n",
870                            ca->mi.first_bucket, ca->mi.nbuckets);
871
872                 bch2_bkey_val_to_text(&buf, c, k);
873                 prt_newline(&buf);
874         }
875
876         bch2_fs_inconsistent(c, "%s", buf.buf);
877
878         bch2_trans_iter_exit(trans, &iter);
879         printbuf_exit(&buf);
880 }
881
882 int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
883                        struct bvec_iter iter, struct bpos read_pos,
884                        enum btree_id data_btree, struct bkey_s_c k,
885                        unsigned offset_into_extent,
886                        struct bch_io_failures *failed, unsigned flags)
887 {
888         struct bch_fs *c = trans->c;
889         struct extent_ptr_decoded pick;
890         struct bch_read_bio *rbio = NULL;
891         struct promote_op *promote = NULL;
892         bool bounce = false, read_full = false, narrow_crcs = false;
893         struct bpos data_pos = bkey_start_pos(k.k);
894         int pick_ret;
895
896         if (bkey_extent_is_inline_data(k.k)) {
897                 unsigned bytes = min_t(unsigned, iter.bi_size,
898                                        bkey_inline_data_bytes(k.k));
899
900                 swap(iter.bi_size, bytes);
901                 memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k));
902                 swap(iter.bi_size, bytes);
903                 bio_advance_iter(&orig->bio, &iter, bytes);
904                 zero_fill_bio_iter(&orig->bio, iter);
905                 goto out_read_done;
906         }
907 retry_pick:
908         pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
909
910         /* hole or reservation - just zero fill: */
911         if (!pick_ret)
912                 goto hole;
913
914         if (unlikely(pick_ret < 0)) {
915                 struct printbuf buf = PRINTBUF;
916                 bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
917                 prt_printf(&buf, "no device to read from: %s\n  ", bch2_err_str(pick_ret));
918                 bch2_bkey_val_to_text(&buf, c, k);
919
920                 bch_err_ratelimited(c, "%s", buf.buf);
921                 printbuf_exit(&buf);
922                 goto err;
923         }
924
925         if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) {
926                 struct printbuf buf = PRINTBUF;
927                 bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
928                 prt_printf(&buf, "attempting to read encrypted data without encryption key\n  ");
929                 bch2_bkey_val_to_text(&buf, c, k);
930
931                 bch_err_ratelimited(c, "%s", buf.buf);
932                 printbuf_exit(&buf);
933                 goto err;
934         }
935
936         struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
937
938         /*
939          * Stale dirty pointers are treated as IO errors, but @failed isn't
940          * allocated unless we're in the retry path - so if we're not in the
941          * retry path, don't check here, it'll be caught in bch2_read_endio()
942          * and we'll end up in the retry path:
943          */
944         if ((flags & BCH_READ_IN_RETRY) &&
945             !pick.ptr.cached &&
946             ca &&
947             unlikely(dev_ptr_stale(ca, &pick.ptr))) {
948                 read_from_stale_dirty_pointer(trans, ca, k, pick.ptr);
949                 bch2_mark_io_failure(failed, &pick);
950                 percpu_ref_put(&ca->io_ref);
951                 goto retry_pick;
952         }
953
954         /*
955          * Unlock the iterator while the btree node's lock is still in
956          * cache, before doing the IO:
957          */
958         bch2_trans_unlock(trans);
959
960         if (flags & BCH_READ_NODECODE) {
961                 /*
962                  * can happen if we retry, and the extent we were going to read
963                  * has been merged in the meantime:
964                  */
965                 if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) {
966                         if (ca)
967                                 percpu_ref_put(&ca->io_ref);
968                         goto hole;
969                 }
970
971                 iter.bi_size    = pick.crc.compressed_size << 9;
972                 goto get_bio;
973         }
974
975         if (!(flags & BCH_READ_LAST_FRAGMENT) ||
976             bio_flagged(&orig->bio, BIO_CHAIN))
977                 flags |= BCH_READ_MUST_CLONE;
978
979         narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
980                 bch2_can_narrow_extent_crcs(k, pick.crc);
981
982         if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
983                 flags |= BCH_READ_MUST_BOUNCE;
984
985         EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
986
987         if (crc_is_compressed(pick.crc) ||
988             (pick.crc.csum_type != BCH_CSUM_none &&
989              (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
990               (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
991                (flags & BCH_READ_USER_MAPPED)) ||
992               (flags & BCH_READ_MUST_BOUNCE)))) {
993                 read_full = true;
994                 bounce = true;
995         }
996
997         if (orig->opts.promote_target || have_io_error(failed))
998                 promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
999                                         &rbio, &bounce, &read_full, failed);
1000
1001         if (!read_full) {
1002                 EBUG_ON(crc_is_compressed(pick.crc));
1003                 EBUG_ON(pick.crc.csum_type &&
1004                         (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
1005                          bvec_iter_sectors(iter) != pick.crc.live_size ||
1006                          pick.crc.offset ||
1007                          offset_into_extent));
1008
1009                 data_pos.offset += offset_into_extent;
1010                 pick.ptr.offset += pick.crc.offset +
1011                         offset_into_extent;
1012                 offset_into_extent              = 0;
1013                 pick.crc.compressed_size        = bvec_iter_sectors(iter);
1014                 pick.crc.uncompressed_size      = bvec_iter_sectors(iter);
1015                 pick.crc.offset                 = 0;
1016                 pick.crc.live_size              = bvec_iter_sectors(iter);
1017         }
1018 get_bio:
1019         if (rbio) {
1020                 /*
1021                  * promote already allocated bounce rbio:
1022                  * promote needs to allocate a bio big enough for uncompressing
1023                  * data in the write path, but we're not going to use it all
1024                  * here:
1025                  */
1026                 EBUG_ON(rbio->bio.bi_iter.bi_size <
1027                        pick.crc.compressed_size << 9);
1028                 rbio->bio.bi_iter.bi_size =
1029                         pick.crc.compressed_size << 9;
1030         } else if (bounce) {
1031                 unsigned sectors = pick.crc.compressed_size;
1032
1033                 rbio = rbio_init(bio_alloc_bioset(NULL,
1034                                                   DIV_ROUND_UP(sectors, PAGE_SECTORS),
1035                                                   0,
1036                                                   GFP_NOFS,
1037                                                   &c->bio_read_split),
1038                                  orig->opts);
1039
1040                 bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
1041                 rbio->bounce    = true;
1042                 rbio->split     = true;
1043         } else if (flags & BCH_READ_MUST_CLONE) {
1044                 /*
1045                  * Have to clone if there were any splits, due to error
1046                  * reporting issues (if a split errored, and retrying didn't
1047                  * work, when it reports the error to its parent (us) we don't
1048                  * know if the error was from our bio, and we should retry, or
1049                  * from the whole bio, in which case we don't want to retry and
1050                  * lose the error)
1051                  */
1052                 rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
1053                                                  &c->bio_read_split),
1054                                  orig->opts);
1055                 rbio->bio.bi_iter = iter;
1056                 rbio->split     = true;
1057         } else {
1058                 rbio = orig;
1059                 rbio->bio.bi_iter = iter;
1060                 EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
1061         }
1062
1063         EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
1064
1065         rbio->c                 = c;
1066         rbio->submit_time       = local_clock();
1067         if (rbio->split)
1068                 rbio->parent    = orig;
1069         else
1070                 rbio->end_io    = orig->bio.bi_end_io;
1071         rbio->bvec_iter         = iter;
1072         rbio->offset_into_extent= offset_into_extent;
1073         rbio->flags             = flags;
1074         rbio->have_ioref        = ca != NULL;
1075         rbio->narrow_crcs       = narrow_crcs;
1076         rbio->hole              = 0;
1077         rbio->retry             = 0;
1078         rbio->context           = 0;
1079         /* XXX: only initialize this if needed */
1080         rbio->devs_have         = bch2_bkey_devs(k);
1081         rbio->pick              = pick;
1082         rbio->subvol            = orig->subvol;
1083         rbio->read_pos          = read_pos;
1084         rbio->data_btree        = data_btree;
1085         rbio->data_pos          = data_pos;
1086         rbio->version           = k.k->bversion;
1087         rbio->promote           = promote;
1088         INIT_WORK(&rbio->work, NULL);
1089
1090         if (flags & BCH_READ_NODECODE)
1091                 orig->pick = pick;
1092
1093         rbio->bio.bi_opf        = orig->bio.bi_opf;
1094         rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
1095         rbio->bio.bi_end_io     = bch2_read_endio;
1096
1097         if (rbio->bounce)
1098                 trace_and_count(c, read_bounce, &rbio->bio);
1099
1100         this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
1101         bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
1102
1103         /*
1104          * If it's being moved internally, we don't want to flag it as a cache
1105          * hit:
1106          */
1107         if (ca && pick.ptr.cached && !(flags & BCH_READ_NODECODE))
1108                 bch2_bucket_io_time_reset(trans, pick.ptr.dev,
1109                         PTR_BUCKET_NR(ca, &pick.ptr), READ);
1110
1111         if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
1112                 bio_inc_remaining(&orig->bio);
1113                 trace_and_count(c, read_split, &orig->bio);
1114         }
1115
1116         if (!rbio->pick.idx) {
1117                 if (unlikely(!rbio->have_ioref)) {
1118                         struct printbuf buf = PRINTBUF;
1119                         bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
1120                         prt_printf(&buf, "no device to read from:\n  ");
1121                         bch2_bkey_val_to_text(&buf, c, k);
1122
1123                         bch_err_ratelimited(c, "%s", buf.buf);
1124                         printbuf_exit(&buf);
1125
1126                         bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
1127                         goto out;
1128                 }
1129
1130                 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user],
1131                              bio_sectors(&rbio->bio));
1132                 bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
1133
1134                 if (unlikely(c->opts.no_data_io)) {
1135                         if (likely(!(flags & BCH_READ_IN_RETRY)))
1136                                 bio_endio(&rbio->bio);
1137                 } else {
1138                         if (likely(!(flags & BCH_READ_IN_RETRY)))
1139                                 submit_bio(&rbio->bio);
1140                         else
1141                                 submit_bio_wait(&rbio->bio);
1142                 }
1143
1144                 /*
1145                  * We just submitted IO which may block, we expect relock fail
1146                  * events and shouldn't count them:
1147                  */
1148                 trans->notrace_relock_fail = true;
1149         } else {
1150                 /* Attempting reconstruct read: */
1151                 if (bch2_ec_read_extent(trans, rbio, k)) {
1152                         bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
1153                         goto out;
1154                 }
1155
1156                 if (likely(!(flags & BCH_READ_IN_RETRY)))
1157                         bio_endio(&rbio->bio);
1158         }
1159 out:
1160         if (likely(!(flags & BCH_READ_IN_RETRY))) {
1161                 return 0;
1162         } else {
1163                 int ret;
1164
1165                 rbio->context = RBIO_CONTEXT_UNBOUND;
1166                 bch2_read_endio(&rbio->bio);
1167
1168                 ret = rbio->retry;
1169                 rbio = bch2_rbio_free(rbio);
1170
1171                 if (ret == READ_RETRY_AVOID) {
1172                         bch2_mark_io_failure(failed, &pick);
1173                         ret = READ_RETRY;
1174                 }
1175
1176                 if (!ret)
1177                         goto out_read_done;
1178
1179                 return ret;
1180         }
1181
1182 err:
1183         if (flags & BCH_READ_IN_RETRY)
1184                 return READ_ERR;
1185
1186         orig->bio.bi_status = BLK_STS_IOERR;
1187         goto out_read_done;
1188
1189 hole:
1190         /*
1191          * won't normally happen in the BCH_READ_NODECODE
1192          * (bch2_move_extent()) path, but if we retry and the extent we wanted
1193          * to read no longer exists we have to signal that:
1194          */
1195         if (flags & BCH_READ_NODECODE)
1196                 orig->hole = true;
1197
1198         zero_fill_bio_iter(&orig->bio, iter);
1199 out_read_done:
1200         if (flags & BCH_READ_LAST_FRAGMENT)
1201                 bch2_rbio_done(orig);
1202         return 0;
1203 }
1204
1205 void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
1206                  struct bvec_iter bvec_iter, subvol_inum inum,
1207                  struct bch_io_failures *failed, unsigned flags)
1208 {
1209         struct btree_trans *trans = bch2_trans_get(c);
1210         struct btree_iter iter;
1211         struct bkey_buf sk;
1212         struct bkey_s_c k;
1213         int ret;
1214
1215         BUG_ON(flags & BCH_READ_NODECODE);
1216
1217         bch2_bkey_buf_init(&sk);
1218         bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
1219                              POS(inum.inum, bvec_iter.bi_sector),
1220                              BTREE_ITER_slots);
1221
1222         while (1) {
1223                 enum btree_id data_btree = BTREE_ID_extents;
1224
1225                 bch2_trans_begin(trans);
1226
1227                 u32 snapshot;
1228                 ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
1229                 if (ret)
1230                         goto err;
1231
1232                 bch2_btree_iter_set_snapshot(&iter, snapshot);
1233
1234                 bch2_btree_iter_set_pos(&iter,
1235                                 POS(inum.inum, bvec_iter.bi_sector));
1236
1237                 k = bch2_btree_iter_peek_slot(&iter);
1238                 ret = bkey_err(k);
1239                 if (ret)
1240                         goto err;
1241
1242                 s64 offset_into_extent = iter.pos.offset -
1243                         bkey_start_offset(k.k);
1244                 unsigned sectors = k.k->size - offset_into_extent;
1245
1246                 bch2_bkey_buf_reassemble(&sk, c, k);
1247
1248                 ret = bch2_read_indirect_extent(trans, &data_btree,
1249                                         &offset_into_extent, &sk);
1250                 if (ret)
1251                         goto err;
1252
1253                 k = bkey_i_to_s_c(sk.k);
1254
1255                 /*
1256                  * With indirect extents, the amount of data to read is the min
1257                  * of the original extent and the indirect extent:
1258                  */
1259                 sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent);
1260
1261                 unsigned bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
1262                 swap(bvec_iter.bi_size, bytes);
1263
1264                 if (bvec_iter.bi_size == bytes)
1265                         flags |= BCH_READ_LAST_FRAGMENT;
1266
1267                 ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
1268                                          data_btree, k,
1269                                          offset_into_extent, failed, flags);
1270                 if (ret)
1271                         goto err;
1272
1273                 if (flags & BCH_READ_LAST_FRAGMENT)
1274                         break;
1275
1276                 swap(bvec_iter.bi_size, bytes);
1277                 bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
1278 err:
1279                 if (ret &&
1280                     !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
1281                     ret != READ_RETRY &&
1282                     ret != READ_RETRY_AVOID)
1283                         break;
1284         }
1285
1286         bch2_trans_iter_exit(trans, &iter);
1287
1288         if (ret) {
1289                 struct printbuf buf = PRINTBUF;
1290                 bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9);
1291                 prt_printf(&buf, "read error %i from btree lookup", ret);
1292                 bch_err_ratelimited(c, "%s", buf.buf);
1293                 printbuf_exit(&buf);
1294
1295                 rbio->bio.bi_status = BLK_STS_IOERR;
1296                 bch2_rbio_done(rbio);
1297         }
1298
1299         bch2_trans_put(trans);
1300         bch2_bkey_buf_exit(&sk, c);
1301 }
1302
1303 void bch2_fs_io_read_exit(struct bch_fs *c)
1304 {
1305         if (c->promote_table.tbl)
1306                 rhashtable_destroy(&c->promote_table);
1307         bioset_exit(&c->bio_read_split);
1308         bioset_exit(&c->bio_read);
1309 }
1310
1311 int bch2_fs_io_read_init(struct bch_fs *c)
1312 {
1313         if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
1314                         BIOSET_NEED_BVECS))
1315                 return -BCH_ERR_ENOMEM_bio_read_init;
1316
1317         if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
1318                         BIOSET_NEED_BVECS))
1319                 return -BCH_ERR_ENOMEM_bio_read_split_init;
1320
1321         if (rhashtable_init(&c->promote_table, &bch_promote_params))
1322                 return -BCH_ERR_ENOMEM_promote_table_init;
1323
1324         return 0;
1325 }
This page took 0.105477 seconds and 4 git commands to generate.