fs/bcachefs/fs-io-pagecache.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 #ifndef NO_BCACHEFS_FS
   3
   4 #include "bcachefs.h"
   5 #include "btree_iter.h"
   6 #include "extents.h"
   7 #include "fs-io.h"
   8 #include "fs-io-pagecache.h"
   9 #include "subvolume.h"
  10
  11 #include <linux/pagevec.h>
  12 #include <linux/writeback.h>
  13
  14 int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
  15                                      loff_t start, u64 end,
  16                                      fgf_t fgp_flags, gfp_t gfp,
  17                                      folios *fs)
  18 {
  19         struct folio *f;
  20         u64 pos = start;
  21         int ret = 0;
  22
  23         while (pos < end) {
  24                 if ((u64) pos >= (u64) start + (1ULL << 20))
  25                         fgp_flags &= ~FGP_CREAT;
  26
  27                 ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
  28                 if (ret)
  29                         break;
  30
  31                 f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
  32                 if (IS_ERR(f))
  33                         break;
  34
  35                 BUG_ON(fs->nr && folio_pos(f) != pos);
  36
  37                 pos = folio_end_pos(f);
  38                 darray_push(fs, f);
  39         }
  40
  41         if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
  42                 ret = -ENOMEM;
  43
  44         return fs->nr ? 0 : ret;
  45 }
  46
  47 /* pagecache_block must be held */
  48 int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
  49                                             loff_t start, loff_t end)
  50 {
  51         int ret;
  52
  53         /*
  54          * XXX: the way this is currently implemented, we can spin if a process
  55          * is continually redirtying a specific page
  56          */
  57         do {
  58                 if (!mapping->nrpages)
  59                         return 0;
  60
  61                 ret = filemap_write_and_wait_range(mapping, start, end);
  62                 if (ret)
  63                         break;
  64
  65                 if (!mapping->nrpages)
  66                         return 0;
  67
  68                 ret = invalidate_inode_pages2_range(mapping,
  69                                 start >> PAGE_SHIFT,
  70                                 end >> PAGE_SHIFT);
  71         } while (ret == -EBUSY);
  72
  73         return ret;
  74 }
  75
  76 #if 0
  77 /* Useful for debug tracing: */
  78 static const char * const bch2_folio_sector_states[] = {
  79 #define x(n)    #n,
  80         BCH_FOLIO_SECTOR_STATE()
  81 #undef x
  82         NULL
  83 };
  84 #endif
  85
  86 static inline enum bch_folio_sector_state
  87 folio_sector_dirty(enum bch_folio_sector_state state)
  88 {
  89         switch (state) {
  90         case SECTOR_unallocated:
  91                 return SECTOR_dirty;
  92         case SECTOR_reserved:
  93                 return SECTOR_dirty_reserved;
  94         default:
  95                 return state;
  96         }
  97 }
  98
  99 static inline enum bch_folio_sector_state
 100 folio_sector_undirty(enum bch_folio_sector_state state)
 101 {
 102         switch (state) {
 103         case SECTOR_dirty:
 104                 return SECTOR_unallocated;
 105         case SECTOR_dirty_reserved:
 106                 return SECTOR_reserved;
 107         default:
 108                 return state;
 109         }
 110 }
 111
 112 static inline enum bch_folio_sector_state
 113 folio_sector_reserve(enum bch_folio_sector_state state)
 114 {
 115         switch (state) {
 116         case SECTOR_unallocated:
 117                 return SECTOR_reserved;
 118         case SECTOR_dirty:
 119                 return SECTOR_dirty_reserved;
 120         default:
 121                 return state;
 122         }
 123 }
 124
 125 /* for newly allocated folios: */
 126 struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
 127 {
 128         struct bch_folio *s;
 129
 130         s = kzalloc(sizeof(*s) +
 131                     sizeof(struct bch_folio_sector) *
 132                     folio_sectors(folio), gfp);
 133         if (!s)
 134                 return NULL;
 135
 136         spin_lock_init(&s->lock);
 137         folio_attach_private(folio, s);
 138         return s;
 139 }
 140
 141 struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
 142 {
 143         return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
 144 }
 145
 146 static unsigned bkey_to_sector_state(struct bkey_s_c k)
 147 {
 148         if (bkey_extent_is_reservation(k))
 149                 return SECTOR_reserved;
 150         if (bkey_extent_is_allocation(k.k))
 151                 return SECTOR_allocated;
 152         return SECTOR_unallocated;
 153 }
 154
 155 static void __bch2_folio_set(struct folio *folio,
 156                              unsigned pg_offset, unsigned pg_len,
 157                              unsigned nr_ptrs, unsigned state)
 158 {
 159         struct bch_folio *s = bch2_folio(folio);
 160         unsigned i, sectors = folio_sectors(folio);
 161
 162         BUG_ON(pg_offset >= sectors);
 163         BUG_ON(pg_offset + pg_len > sectors);
 164
 165         spin_lock(&s->lock);
 166
 167         for (i = pg_offset; i < pg_offset + pg_len; i++) {
 168                 s->s[i].nr_replicas     = nr_ptrs;
 169                 bch2_folio_sector_set(folio, s, i, state);
 170         }
 171
 172         if (i == sectors)
 173                 s->uptodate = true;
 174
 175         spin_unlock(&s->lock);
 176 }
 177
 178 /*
 179  * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
 180  * extents btree:
 181  */
 182 int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
 183                    struct folio **fs, unsigned nr_folios)
 184 {
 185         u64 offset = folio_sector(fs[0]);
 186         bool need_set = false;
 187
 188         for (unsigned folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
 189                 struct bch_folio *s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
 190                 if (!s)
 191                         return -ENOMEM;
 192
 193                 need_set |= !s->uptodate;
 194         }
 195
 196         if (!need_set)
 197                 return 0;
 198
 199         unsigned folio_idx = 0;
 200
 201         return bch2_trans_run(c,
 202                 for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents,
 203                                    POS(inum.inum, offset),
 204                                    POS(inum.inum, U64_MAX),
 205                                    inum.subvol, BTREE_ITER_slots, k, ({
 206                         unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
 207                         unsigned state = bkey_to_sector_state(k);
 208
 209                         while (folio_idx < nr_folios) {
 210                                 struct folio *folio = fs[folio_idx];
 211                                 u64 folio_start = folio_sector(folio);
 212                                 u64 folio_end   = folio_end_sector(folio);
 213                                 unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
 214                                         folio_start;
 215                                 unsigned folio_len = min(k.k->p.offset, folio_end) -
 216                                         folio_offset - folio_start;
 217
 218                                 BUG_ON(k.k->p.offset < folio_start);
 219                                 BUG_ON(bkey_start_offset(k.k) > folio_end);
 220
 221                                 if (!bch2_folio(folio)->uptodate)
 222                                         __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
 223
 224                                 if (k.k->p.offset < folio_end)
 225                                         break;
 226                                 folio_idx++;
 227                         }
 228
 229                         if (folio_idx == nr_folios)
 230                                 break;
 231                         0;
 232                 })));
 233 }
 234
 235 void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
 236 {
 237         struct bvec_iter iter;
 238         struct folio_vec fv;
 239         unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
 240                 ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
 241         unsigned state = bkey_to_sector_state(k);
 242
 243         bio_for_each_folio(fv, bio, iter)
 244                 __bch2_folio_set(fv.fv_folio,
 245                                  fv.fv_offset >> 9,
 246                                  fv.fv_len >> 9,
 247                                  nr_ptrs, state);
 248 }
 249
 250 void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
 251                                      u64 start, u64 end)
 252 {
 253         pgoff_t index = start >> PAGE_SECTORS_SHIFT;
 254         pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
 255         struct folio_batch fbatch;
 256         unsigned i, j;
 257
 258         if (end <= start)
 259                 return;
 260
 261         folio_batch_init(&fbatch);
 262
 263         while (filemap_get_folios(inode->v.i_mapping,
 264                                   &index, end_index, &fbatch)) {
 265                 for (i = 0; i < folio_batch_count(&fbatch); i++) {
 266                         struct folio *folio = fbatch.folios[i];
 267                         u64 folio_start = folio_sector(folio);
 268                         u64 folio_end = folio_end_sector(folio);
 269                         unsigned folio_offset = max(start, folio_start) - folio_start;
 270                         unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
 271                         struct bch_folio *s;
 272
 273                         BUG_ON(end <= folio_start);
 274
 275                         folio_lock(folio);
 276                         s = bch2_folio(folio);
 277
 278                         if (s) {
 279                                 spin_lock(&s->lock);
 280                                 for (j = folio_offset; j < folio_offset + folio_len; j++)
 281                                         s->s[j].nr_replicas = 0;
 282                                 spin_unlock(&s->lock);
 283                         }
 284
 285                         folio_unlock(folio);
 286                 }
 287                 folio_batch_release(&fbatch);
 288                 cond_resched();
 289         }
 290 }
 291
 292 int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
 293                                  u64 *start, u64 end,
 294                                  bool nonblocking)
 295 {
 296         struct bch_fs *c = inode->v.i_sb->s_fs_info;
 297         pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
 298         pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
 299         struct folio_batch fbatch;
 300         s64 i_sectors_delta = 0;
 301         int ret = 0;
 302
 303         if (end <= *start)
 304                 return 0;
 305
 306         folio_batch_init(&fbatch);
 307
 308         while (filemap_get_folios(inode->v.i_mapping,
 309                                   &index, end_index, &fbatch)) {
 310                 for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
 311                         struct folio *folio = fbatch.folios[i];
 312
 313                         if (!nonblocking)
 314                                 folio_lock(folio);
 315                         else if (!folio_trylock(folio)) {
 316                                 folio_batch_release(&fbatch);
 317                                 ret = -EAGAIN;
 318                                 break;
 319                         }
 320
 321                         u64 folio_start = folio_sector(folio);
 322                         u64 folio_end = folio_end_sector(folio);
 323
 324                         BUG_ON(end <= folio_start);
 325
 326                         *start = min(end, folio_end);
 327
 328                         struct bch_folio *s = bch2_folio(folio);
 329                         if (s) {
 330                                 unsigned folio_offset = max(*start, folio_start) - folio_start;
 331                                 unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
 332
 333                                 spin_lock(&s->lock);
 334                                 for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
 335                                         i_sectors_delta -= s->s[j].state == SECTOR_dirty;
 336                                         bch2_folio_sector_set(folio, s, j,
 337                                                 folio_sector_reserve(s->s[j].state));
 338                                 }
 339                                 spin_unlock(&s->lock);
 340                         }
 341
 342                         folio_unlock(folio);
 343                 }
 344                 folio_batch_release(&fbatch);
 345                 cond_resched();
 346         }
 347
 348         bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
 349         return ret;
 350 }
 351
 352 static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
 353                                           unsigned nr_replicas)
 354 {
 355         return max(0, (int) nr_replicas -
 356                    s->nr_replicas -
 357                    s->replicas_reserved);
 358 }
 359
 360 int bch2_get_folio_disk_reservation(struct bch_fs *c,
 361                                 struct bch_inode_info *inode,
 362                                 struct folio *folio, bool check_enospc)
 363 {
 364         struct bch_folio *s = bch2_folio_create(folio, 0);
 365         unsigned nr_replicas = inode_nr_replicas(c, inode);
 366         struct disk_reservation disk_res = { 0 };
 367         unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
 368         int ret;
 369
 370         if (!s)
 371                 return -ENOMEM;
 372
 373         for (i = 0; i < sectors; i++)
 374                 disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
 375
 376         if (!disk_res_sectors)
 377                 return 0;
 378
 379         ret = bch2_disk_reservation_get(c, &disk_res,
 380                                         disk_res_sectors, 1,
 381                                         !check_enospc
 382                                         ? BCH_DISK_RESERVATION_NOFAIL
 383                                         : 0);
 384         if (unlikely(ret))
 385                 return ret;
 386
 387         for (i = 0; i < sectors; i++)
 388                 s->s[i].replicas_reserved +=
 389                         sectors_to_reserve(&s->s[i], nr_replicas);
 390
 391         return 0;
 392 }
 393
 394 void bch2_folio_reservation_put(struct bch_fs *c,
 395                         struct bch_inode_info *inode,
 396                         struct bch2_folio_reservation *res)
 397 {
 398         bch2_disk_reservation_put(c, &res->disk);
 399         bch2_quota_reservation_put(c, inode, &res->quota);
 400 }
 401
 402 static int __bch2_folio_reservation_get(struct bch_fs *c,
 403                         struct bch_inode_info *inode,
 404                         struct folio *folio,
 405                         struct bch2_folio_reservation *res,
 406                         size_t offset, size_t len,
 407                         bool partial)
 408 {
 409         struct bch_folio *s = bch2_folio_create(folio, 0);
 410         unsigned i, disk_sectors = 0, quota_sectors = 0;
 411         struct disk_reservation disk_res = {};
 412         size_t reserved = len;
 413         int ret;
 414
 415         if (!s)
 416                 return -ENOMEM;
 417
 418         BUG_ON(!s->uptodate);
 419
 420         for (i = round_down(offset, block_bytes(c)) >> 9;
 421              i < round_up(offset + len, block_bytes(c)) >> 9;
 422              i++) {
 423                 disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
 424                 quota_sectors += s->s[i].state == SECTOR_unallocated;
 425         }
 426
 427         if (disk_sectors) {
 428                 ret = bch2_disk_reservation_add(c, &disk_res, disk_sectors,
 429                                 partial ? BCH_DISK_RESERVATION_PARTIAL : 0);
 430                 if (unlikely(ret))
 431                         return ret;
 432
 433                 if (unlikely(disk_res.sectors != disk_sectors)) {
 434                         disk_sectors = quota_sectors = 0;
 435
 436                         for (i = round_down(offset, block_bytes(c)) >> 9;
 437                              i < round_up(offset + len, block_bytes(c)) >> 9;
 438                              i++) {
 439                                 disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
 440                                 if (disk_sectors > disk_res.sectors) {
 441                                         /*
 442                                          * Make sure to get a reservation that's
 443                                          * aligned to the filesystem blocksize:
 444                                          */
 445                                         unsigned reserved_offset = round_down(i << 9, block_bytes(c));
 446                                         reserved = clamp(reserved_offset, offset, offset + len) - offset;
 447
 448                                         if (!reserved) {
 449                                                 bch2_disk_reservation_put(c, &disk_res);
 450                                                 return -BCH_ERR_ENOSPC_disk_reservation;
 451                                         }
 452                                         break;
 453                                 }
 454                                 quota_sectors += s->s[i].state == SECTOR_unallocated;
 455                         }
 456                 }
 457         }
 458
 459         if (quota_sectors) {
 460                 ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
 461                 if (unlikely(ret)) {
 462                         bch2_disk_reservation_put(c, &disk_res);
 463                         return ret;
 464                 }
 465         }
 466
 467         res->disk.sectors += disk_res.sectors;
 468         return partial ? reserved : 0;
 469 }
 470
 471 int bch2_folio_reservation_get(struct bch_fs *c,
 472                         struct bch_inode_info *inode,
 473                         struct folio *folio,
 474                         struct bch2_folio_reservation *res,
 475                         size_t offset, size_t len)
 476 {
 477         return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
 478 }
 479
 480 ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
 481                         struct bch_inode_info *inode,
 482                         struct folio *folio,
 483                         struct bch2_folio_reservation *res,
 484                         size_t offset, size_t len)
 485 {
 486         return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, true);
 487 }
 488
 489 static void bch2_clear_folio_bits(struct folio *folio)
 490 {
 491         struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
 492         struct bch_fs *c = inode->v.i_sb->s_fs_info;
 493         struct bch_folio *s = bch2_folio(folio);
 494         struct disk_reservation disk_res = { 0 };
 495         int i, sectors = folio_sectors(folio), dirty_sectors = 0;
 496
 497         if (!s)
 498                 return;
 499
 500         EBUG_ON(!folio_test_locked(folio));
 501         EBUG_ON(folio_test_writeback(folio));
 502
 503         for (i = 0; i < sectors; i++) {
 504                 disk_res.sectors += s->s[i].replicas_reserved;
 505                 s->s[i].replicas_reserved = 0;
 506
 507                 dirty_sectors -= s->s[i].state == SECTOR_dirty;
 508                 bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
 509         }
 510
 511         bch2_disk_reservation_put(c, &disk_res);
 512
 513         bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
 514
 515         bch2_folio_release(folio);
 516 }
 517
 518 void bch2_set_folio_dirty(struct bch_fs *c,
 519                           struct bch_inode_info *inode,
 520                           struct folio *folio,
 521                           struct bch2_folio_reservation *res,
 522                           unsigned offset, unsigned len)
 523 {
 524         struct bch_folio *s = bch2_folio(folio);
 525         unsigned i, dirty_sectors = 0;
 526
 527         WARN_ON((u64) folio_pos(folio) + offset + len >
 528                 round_up((u64) i_size_read(&inode->v), block_bytes(c)));
 529
 530         BUG_ON(!s->uptodate);
 531
 532         spin_lock(&s->lock);
 533
 534         for (i = round_down(offset, block_bytes(c)) >> 9;
 535              i < round_up(offset + len, block_bytes(c)) >> 9;
 536              i++) {
 537                 unsigned sectors = sectors_to_reserve(&s->s[i],
 538                                                 res->disk.nr_replicas);
 539
 540                 /*
 541                  * This can happen if we race with the error path in
 542                  * bch2_writepage_io_done():
 543                  */
 544                 sectors = min_t(unsigned, sectors, res->disk.sectors);
 545
 546                 s->s[i].replicas_reserved += sectors;
 547                 res->disk.sectors -= sectors;
 548
 549                 dirty_sectors += s->s[i].state == SECTOR_unallocated;
 550
 551                 bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
 552         }
 553
 554         spin_unlock(&s->lock);
 555
 556         bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
 557
 558         if (!folio_test_dirty(folio))
 559                 filemap_dirty_folio(inode->v.i_mapping, folio);
 560 }
 561
 562 vm_fault_t bch2_page_fault(struct vm_fault *vmf)
 563 {
 564         struct file *file = vmf->vma->vm_file;
 565         struct address_space *mapping = file->f_mapping;
 566         struct address_space *fdm = faults_disabled_mapping();
 567         struct bch_inode_info *inode = file_bch_inode(file);
 568         vm_fault_t ret;
 569
 570         if (fdm == mapping)
 571                 return VM_FAULT_SIGBUS;
 572
 573         /* Lock ordering: */
 574         if (fdm > mapping) {
 575                 struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
 576
 577                 if (bch2_pagecache_add_tryget(inode))
 578                         goto got_lock;
 579
 580                 bch2_pagecache_block_put(fdm_host);
 581
 582                 bch2_pagecache_add_get(inode);
 583                 bch2_pagecache_add_put(inode);
 584
 585                 bch2_pagecache_block_get(fdm_host);
 586
 587                 /* Signal that lock has been dropped: */
 588                 set_fdm_dropped_locks();
 589                 return VM_FAULT_SIGBUS;
 590         }
 591
 592         bch2_pagecache_add_get(inode);
 593 got_lock:
 594         ret = filemap_fault(vmf);
 595         bch2_pagecache_add_put(inode);
 596
 597         return ret;
 598 }
 599
 600 vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
 601 {
 602         struct folio *folio = page_folio(vmf->page);
 603         struct file *file = vmf->vma->vm_file;
 604         struct bch_inode_info *inode = file_bch_inode(file);
 605         struct address_space *mapping = file->f_mapping;
 606         struct bch_fs *c = inode->v.i_sb->s_fs_info;
 607         struct bch2_folio_reservation res;
 608         unsigned len;
 609         loff_t isize;
 610         vm_fault_t ret;
 611
 612         bch2_folio_reservation_init(c, inode, &res);
 613
 614         sb_start_pagefault(inode->v.i_sb);
 615         file_update_time(file);
 616
 617         /*
 618          * Not strictly necessary, but helps avoid dio writes livelocking in
 619          * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
 620          * a bch2_write_invalidate_inode_pages_range() that works without dropping
 621          * page lock before invalidating page
 622          */
 623         bch2_pagecache_add_get(inode);
 624
 625         folio_lock(folio);
 626         isize = i_size_read(&inode->v);
 627
 628         if (folio->mapping != mapping || folio_pos(folio) >= isize) {
 629                 folio_unlock(folio);
 630                 ret = VM_FAULT_NOPAGE;
 631                 goto out;
 632         }
 633
 634         len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
 635
 636         if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
 637             bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
 638                 folio_unlock(folio);
 639                 ret = VM_FAULT_SIGBUS;
 640                 goto out;
 641         }
 642
 643         bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
 644         bch2_folio_reservation_put(c, inode, &res);
 645
 646         folio_wait_stable(folio);
 647         ret = VM_FAULT_LOCKED;
 648 out:
 649         bch2_pagecache_add_put(inode);
 650         sb_end_pagefault(inode->v.i_sb);
 651
 652         return ret;
 653 }
 654
 655 void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
 656 {
 657         if (offset || length < folio_size(folio))
 658                 return;
 659
 660         bch2_clear_folio_bits(folio);
 661 }
 662
 663 bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
 664 {
 665         if (folio_test_dirty(folio) || folio_test_writeback(folio))
 666                 return false;
 667
 668         bch2_clear_folio_bits(folio);
 669         return true;
 670 }
 671
 672 /* fseek: */
 673
 674 static int folio_data_offset(struct folio *folio, loff_t pos,
 675                              unsigned min_replicas)
 676 {
 677         struct bch_folio *s = bch2_folio(folio);
 678         unsigned i, sectors = folio_sectors(folio);
 679
 680         if (s)
 681                 for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
 682                         if (s->s[i].state >= SECTOR_dirty &&
 683                             s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
 684                                 return i << SECTOR_SHIFT;
 685
 686         return -1;
 687 }
 688
 689 loff_t bch2_seek_pagecache_data(struct inode *vinode,
 690                                 loff_t start_offset,
 691                                 loff_t end_offset,
 692                                 unsigned min_replicas,
 693                                 bool nonblock)
 694 {
 695         struct folio_batch fbatch;
 696         pgoff_t start_index     = start_offset >> PAGE_SHIFT;
 697         pgoff_t end_index       = end_offset >> PAGE_SHIFT;
 698         pgoff_t index           = start_index;
 699         unsigned i;
 700         loff_t ret;
 701         int offset;
 702
 703         folio_batch_init(&fbatch);
 704
 705         while (filemap_get_folios(vinode->i_mapping,
 706                                   &index, end_index, &fbatch)) {
 707                 for (i = 0; i < folio_batch_count(&fbatch); i++) {
 708                         struct folio *folio = fbatch.folios[i];
 709
 710                         if (!nonblock) {
 711                                 folio_lock(folio);
 712                         } else if (!folio_trylock(folio)) {
 713                                 folio_batch_release(&fbatch);
 714                                 return -EAGAIN;
 715                         }
 716
 717                         offset = folio_data_offset(folio,
 718                                         max(folio_pos(folio), start_offset),
 719                                         min_replicas);
 720                         if (offset >= 0) {
 721                                 ret = clamp(folio_pos(folio) + offset,
 722                                             start_offset, end_offset);
 723                                 folio_unlock(folio);
 724                                 folio_batch_release(&fbatch);
 725                                 return ret;
 726                         }
 727                         folio_unlock(folio);
 728                 }
 729                 folio_batch_release(&fbatch);
 730                 cond_resched();
 731         }
 732
 733         return end_offset;
 734 }
 735
 736 /*
 737  * Search for a hole in a folio.
 738  *
 739  * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
 740  * code to indicate a pagecache hole exists at the returned offset. Otherwise
 741  * return 0 if the folio is filled with data, or an error code. This function
 742  * can return -EAGAIN if nonblock is specified.
 743  */
 744 static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
 745                               unsigned min_replicas, bool nonblock)
 746 {
 747         struct folio *folio;
 748         struct bch_folio *s;
 749         unsigned i, sectors;
 750         int ret = -ENOENT;
 751
 752         folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
 753                                     FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
 754         if (IS_ERR(folio))
 755                 return PTR_ERR(folio);
 756
 757         s = bch2_folio(folio);
 758         if (!s)
 759                 goto unlock;
 760
 761         sectors = folio_sectors(folio);
 762         for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
 763                 if (s->s[i].state < SECTOR_dirty ||
 764                     s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
 765                         *offset = max(*offset,
 766                                       folio_pos(folio) + (i << SECTOR_SHIFT));
 767                         goto unlock;
 768                 }
 769
 770         *offset = folio_end_pos(folio);
 771         ret = 0;
 772 unlock:
 773         folio_unlock(folio);
 774         folio_put(folio);
 775         return ret;
 776 }
 777
 778 loff_t bch2_seek_pagecache_hole(struct inode *vinode,
 779                                 loff_t start_offset,
 780                                 loff_t end_offset,
 781                                 unsigned min_replicas,
 782                                 bool nonblock)
 783 {
 784         struct address_space *mapping = vinode->i_mapping;
 785         loff_t offset = start_offset;
 786         loff_t ret = 0;
 787
 788         while (!ret && offset < end_offset)
 789                 ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
 790
 791         if (ret && ret != -ENOENT)
 792                 return ret;
 793         return min(offset, end_offset);
 794 }
 795
 796 int bch2_clamp_data_hole(struct inode *inode,
 797                          u64 *hole_start,
 798                          u64 *hole_end,
 799                          unsigned min_replicas,
 800                          bool nonblock)
 801 {
 802         loff_t ret;
 803
 804         ret = bch2_seek_pagecache_hole(inode,
 805                 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
 806         if (ret < 0)
 807                 return ret;
 808
 809         *hole_start = ret;
 810
 811         if (*hole_start == *hole_end)
 812                 return 0;
 813
 814         ret = bch2_seek_pagecache_data(inode,
 815                 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
 816         if (ret < 0)
 817                 return ret;
 818
 819         *hole_end = ret;
 820         return 0;
 821 }
 822
 823 #endif /* NO_BCACHEFS_FS */