fs/f2fs/data.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * fs/f2fs/data.c
   4  *
   5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   6  *             http://www.samsung.com/
   7  */
   8 #include <linux/fs.h>
   9 #include <linux/f2fs_fs.h>
  10 #include <linux/buffer_head.h>
  11 #include <linux/sched/mm.h>
  12 #include <linux/mpage.h>
  13 #include <linux/writeback.h>
  14 #include <linux/pagevec.h>
  15 #include <linux/blkdev.h>
  16 #include <linux/bio.h>
  17 #include <linux/blk-crypto.h>
  18 #include <linux/swap.h>
  19 #include <linux/prefetch.h>
  20 #include <linux/uio.h>
  21 #include <linux/sched/signal.h>
  22 #include <linux/fiemap.h>
  23 #include <linux/iomap.h>
  24
  25 #include "f2fs.h"
  26 #include "node.h"
  27 #include "segment.h"
  28 #include "iostat.h"
  29 #include <trace/events/f2fs.h>
  30
  31 #define NUM_PREALLOC_POST_READ_CTXS     128
  32
  33 static struct kmem_cache *bio_post_read_ctx_cache;
  34 static struct kmem_cache *bio_entry_slab;
  35 static mempool_t *bio_post_read_ctx_pool;
  36 static struct bio_set f2fs_bioset;
  37
  38 #define F2FS_BIO_POOL_SIZE      NR_CURSEG_TYPE
  39
  40 int __init f2fs_init_bioset(void)
  41 {
  42         return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
  43                                         0, BIOSET_NEED_BVECS);
  44 }
  45
  46 void f2fs_destroy_bioset(void)
  47 {
  48         bioset_exit(&f2fs_bioset);
  49 }
  50
  51 static bool __is_cp_guaranteed(struct page *page)
  52 {
  53         struct address_space *mapping = page->mapping;
  54         struct inode *inode;
  55         struct f2fs_sb_info *sbi;
  56
  57         if (!mapping)
  58                 return false;
  59
  60         inode = mapping->host;
  61         sbi = F2FS_I_SB(inode);
  62
  63         if (inode->i_ino == F2FS_META_INO(sbi) ||
  64                         inode->i_ino == F2FS_NODE_INO(sbi) ||
  65                         S_ISDIR(inode->i_mode))
  66                 return true;
  67
  68         if (f2fs_is_compressed_page(page))
  69                 return false;
  70         if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
  71                         page_private_gcing(page))
  72                 return true;
  73         return false;
  74 }
  75
  76 static enum count_type __read_io_type(struct page *page)
  77 {
  78         struct address_space *mapping = page_file_mapping(page);
  79
  80         if (mapping) {
  81                 struct inode *inode = mapping->host;
  82                 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  83
  84                 if (inode->i_ino == F2FS_META_INO(sbi))
  85                         return F2FS_RD_META;
  86
  87                 if (inode->i_ino == F2FS_NODE_INO(sbi))
  88                         return F2FS_RD_NODE;
  89         }
  90         return F2FS_RD_DATA;
  91 }
  92
  93 /* postprocessing steps for read bios */
  94 enum bio_post_read_step {
  95 #ifdef CONFIG_FS_ENCRYPTION
  96         STEP_DECRYPT    = 1 << 0,
  97 #else
  98         STEP_DECRYPT    = 0,    /* compile out the decryption-related code */
  99 #endif
 100 #ifdef CONFIG_F2FS_FS_COMPRESSION
 101         STEP_DECOMPRESS = 1 << 1,
 102 #else
 103         STEP_DECOMPRESS = 0,    /* compile out the decompression-related code */
 104 #endif
 105 #ifdef CONFIG_FS_VERITY
 106         STEP_VERITY     = 1 << 2,
 107 #else
 108         STEP_VERITY     = 0,    /* compile out the verity-related code */
 109 #endif
 110 };
 111
 112 struct bio_post_read_ctx {
 113         struct bio *bio;
 114         struct f2fs_sb_info *sbi;
 115         struct work_struct work;
 116         unsigned int enabled_steps;
 117         /*
 118          * decompression_attempted keeps track of whether
 119          * f2fs_end_read_compressed_page() has been called on the pages in the
 120          * bio that belong to a compressed cluster yet.
 121          */
 122         bool decompression_attempted;
 123         block_t fs_blkaddr;
 124 };
 125
 126 /*
 127  * Update and unlock a bio's pages, and free the bio.
 128  *
 129  * This marks pages up-to-date only if there was no error in the bio (I/O error,
 130  * decryption error, or verity error), as indicated by bio->bi_status.
 131  *
 132  * "Compressed pages" (pagecache pages backed by a compressed cluster on-disk)
 133  * aren't marked up-to-date here, as decompression is done on a per-compression-
 134  * cluster basis rather than a per-bio basis.  Instead, we only must do two
 135  * things for each compressed page here: call f2fs_end_read_compressed_page()
 136  * with failed=true if an error occurred before it would have normally gotten
 137  * called (i.e., I/O error or decryption error, but *not* verity error), and
 138  * release the bio's reference to the decompress_io_ctx of the page's cluster.
 139  */
 140 static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
 141 {
 142         struct bio_vec *bv;
 143         struct bvec_iter_all iter_all;
 144         struct bio_post_read_ctx *ctx = bio->bi_private;
 145
 146         bio_for_each_segment_all(bv, bio, iter_all) {
 147                 struct page *page = bv->bv_page;
 148
 149                 if (f2fs_is_compressed_page(page)) {
 150                         if (ctx && !ctx->decompression_attempted)
 151                                 f2fs_end_read_compressed_page(page, true, 0,
 152                                                         in_task);
 153                         f2fs_put_page_dic(page, in_task);
 154                         continue;
 155                 }
 156
 157                 if (bio->bi_status)
 158                         ClearPageUptodate(page);
 159                 else
 160                         SetPageUptodate(page);
 161                 dec_page_count(F2FS_P_SB(page), __read_io_type(page));
 162                 unlock_page(page);
 163         }
 164
 165         if (ctx)
 166                 mempool_free(ctx, bio_post_read_ctx_pool);
 167         bio_put(bio);
 168 }
 169
 170 static void f2fs_verify_bio(struct work_struct *work)
 171 {
 172         struct bio_post_read_ctx *ctx =
 173                 container_of(work, struct bio_post_read_ctx, work);
 174         struct bio *bio = ctx->bio;
 175         bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
 176
 177         /*
 178          * fsverity_verify_bio() may call readahead() again, and while verity
 179          * will be disabled for this, decryption and/or decompression may still
 180          * be needed, resulting in another bio_post_read_ctx being allocated.
 181          * So to prevent deadlocks we need to release the current ctx to the
 182          * mempool first.  This assumes that verity is the last post-read step.
 183          */
 184         mempool_free(ctx, bio_post_read_ctx_pool);
 185         bio->bi_private = NULL;
 186
 187         /*
 188          * Verify the bio's pages with fs-verity.  Exclude compressed pages,
 189          * as those were handled separately by f2fs_end_read_compressed_page().
 190          */
 191         if (may_have_compressed_pages) {
 192                 struct bio_vec *bv;
 193                 struct bvec_iter_all iter_all;
 194
 195                 bio_for_each_segment_all(bv, bio, iter_all) {
 196                         struct page *page = bv->bv_page;
 197
 198                         if (!f2fs_is_compressed_page(page) &&
 199                             !fsverity_verify_page(page)) {
 200                                 bio->bi_status = BLK_STS_IOERR;
 201                                 break;
 202                         }
 203                 }
 204         } else {
 205                 fsverity_verify_bio(bio);
 206         }
 207
 208         f2fs_finish_read_bio(bio, true);
 209 }
 210
 211 /*
 212  * If the bio's data needs to be verified with fs-verity, then enqueue the
 213  * verity work for the bio.  Otherwise finish the bio now.
 214  *
 215  * Note that to avoid deadlocks, the verity work can't be done on the
 216  * decryption/decompression workqueue.  This is because verifying the data pages
 217  * can involve reading verity metadata pages from the file, and these verity
 218  * metadata pages may be encrypted and/or compressed.
 219  */
 220 static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
 221 {
 222         struct bio_post_read_ctx *ctx = bio->bi_private;
 223
 224         if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
 225                 INIT_WORK(&ctx->work, f2fs_verify_bio);
 226                 fsverity_enqueue_verify_work(&ctx->work);
 227         } else {
 228                 f2fs_finish_read_bio(bio, in_task);
 229         }
 230 }
 231
 232 /*
 233  * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
 234  * remaining page was read by @ctx->bio.
 235  *
 236  * Note that a bio may span clusters (even a mix of compressed and uncompressed
 237  * clusters) or be for just part of a cluster.  STEP_DECOMPRESS just indicates
 238  * that the bio includes at least one compressed page.  The actual decompression
 239  * is done on a per-cluster basis, not a per-bio basis.
 240  */
 241 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
 242                 bool in_task)
 243 {
 244         struct bio_vec *bv;
 245         struct bvec_iter_all iter_all;
 246         bool all_compressed = true;
 247         block_t blkaddr = ctx->fs_blkaddr;
 248
 249         bio_for_each_segment_all(bv, ctx->bio, iter_all) {
 250                 struct page *page = bv->bv_page;
 251
 252                 if (f2fs_is_compressed_page(page))
 253                         f2fs_end_read_compressed_page(page, false, blkaddr,
 254                                                       in_task);
 255                 else
 256                         all_compressed = false;
 257
 258                 blkaddr++;
 259         }
 260
 261         ctx->decompression_attempted = true;
 262
 263         /*
 264          * Optimization: if all the bio's pages are compressed, then scheduling
 265          * the per-bio verity work is unnecessary, as verity will be fully
 266          * handled at the compression cluster level.
 267          */
 268         if (all_compressed)
 269                 ctx->enabled_steps &= ~STEP_VERITY;
 270 }
 271
 272 static void f2fs_post_read_work(struct work_struct *work)
 273 {
 274         struct bio_post_read_ctx *ctx =
 275                 container_of(work, struct bio_post_read_ctx, work);
 276         struct bio *bio = ctx->bio;
 277
 278         if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
 279                 f2fs_finish_read_bio(bio, true);
 280                 return;
 281         }
 282
 283         if (ctx->enabled_steps & STEP_DECOMPRESS)
 284                 f2fs_handle_step_decompress(ctx, true);
 285
 286         f2fs_verify_and_finish_bio(bio, true);
 287 }
 288
 289 static void f2fs_read_end_io(struct bio *bio)
 290 {
 291         struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
 292         struct bio_post_read_ctx *ctx;
 293         bool intask = in_task();
 294
 295         iostat_update_and_unbind_ctx(bio);
 296         ctx = bio->bi_private;
 297
 298         if (time_to_inject(sbi, FAULT_READ_IO))
 299                 bio->bi_status = BLK_STS_IOERR;
 300
 301         if (bio->bi_status) {
 302                 f2fs_finish_read_bio(bio, intask);
 303                 return;
 304         }
 305
 306         if (ctx) {
 307                 unsigned int enabled_steps = ctx->enabled_steps &
 308                                         (STEP_DECRYPT | STEP_DECOMPRESS);
 309
 310                 /*
 311                  * If we have only decompression step between decompression and
 312                  * decrypt, we don't need post processing for this.
 313                  */
 314                 if (enabled_steps == STEP_DECOMPRESS &&
 315                                 !f2fs_low_mem_mode(sbi)) {
 316                         f2fs_handle_step_decompress(ctx, intask);
 317                 } else if (enabled_steps) {
 318                         INIT_WORK(&ctx->work, f2fs_post_read_work);
 319                         queue_work(ctx->sbi->post_read_wq, &ctx->work);
 320                         return;
 321                 }
 322         }
 323
 324         f2fs_verify_and_finish_bio(bio, intask);
 325 }
 326
 327 static void f2fs_write_end_io(struct bio *bio)
 328 {
 329         struct f2fs_sb_info *sbi;
 330         struct bio_vec *bvec;
 331         struct bvec_iter_all iter_all;
 332
 333         iostat_update_and_unbind_ctx(bio);
 334         sbi = bio->bi_private;
 335
 336         if (time_to_inject(sbi, FAULT_WRITE_IO))
 337                 bio->bi_status = BLK_STS_IOERR;
 338
 339         bio_for_each_segment_all(bvec, bio, iter_all) {
 340                 struct page *page = bvec->bv_page;
 341                 enum count_type type = WB_DATA_TYPE(page);
 342
 343                 if (page_private_dummy(page)) {
 344                         clear_page_private_dummy(page);
 345                         unlock_page(page);
 346                         mempool_free(page, sbi->write_io_dummy);
 347
 348                         if (unlikely(bio->bi_status))
 349                                 f2fs_stop_checkpoint(sbi, true,
 350                                                 STOP_CP_REASON_WRITE_FAIL);
 351                         continue;
 352                 }
 353
 354                 fscrypt_finalize_bounce_page(&page);
 355
 356 #ifdef CONFIG_F2FS_FS_COMPRESSION
 357                 if (f2fs_is_compressed_page(page)) {
 358                         f2fs_compress_write_end_io(bio, page);
 359                         continue;
 360                 }
 361 #endif
 362
 363                 if (unlikely(bio->bi_status)) {
 364                         mapping_set_error(page->mapping, -EIO);
 365                         if (type == F2FS_WB_CP_DATA)
 366                                 f2fs_stop_checkpoint(sbi, true,
 367                                                 STOP_CP_REASON_WRITE_FAIL);
 368                 }
 369
 370                 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
 371                                         page->index != nid_of_node(page));
 372
 373                 dec_page_count(sbi, type);
 374                 if (f2fs_in_warm_node_list(sbi, page))
 375                         f2fs_del_fsync_node_entry(sbi, page);
 376                 clear_page_private_gcing(page);
 377                 end_page_writeback(page);
 378         }
 379         if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
 380                                 wq_has_sleeper(&sbi->cp_wait))
 381                 wake_up(&sbi->cp_wait);
 382
 383         bio_put(bio);
 384 }
 385
 386 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
 387                 block_t blk_addr, sector_t *sector)
 388 {
 389         struct block_device *bdev = sbi->sb->s_bdev;
 390         int i;
 391
 392         if (f2fs_is_multi_device(sbi)) {
 393                 for (i = 0; i < sbi->s_ndevs; i++) {
 394                         if (FDEV(i).start_blk <= blk_addr &&
 395                             FDEV(i).end_blk >= blk_addr) {
 396                                 blk_addr -= FDEV(i).start_blk;
 397                                 bdev = FDEV(i).bdev;
 398                                 break;
 399                         }
 400                 }
 401         }
 402
 403         if (sector)
 404                 *sector = SECTOR_FROM_BLOCK(blk_addr);
 405         return bdev;
 406 }
 407
 408 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
 409 {
 410         int i;
 411
 412         if (!f2fs_is_multi_device(sbi))
 413                 return 0;
 414
 415         for (i = 0; i < sbi->s_ndevs; i++)
 416                 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
 417                         return i;
 418         return 0;
 419 }
 420
 421 static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
 422 {
 423         unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
 424         unsigned int fua_flag, meta_flag, io_flag;
 425         blk_opf_t op_flags = 0;
 426
 427         if (fio->op != REQ_OP_WRITE)
 428                 return 0;
 429         if (fio->type == DATA)
 430                 io_flag = fio->sbi->data_io_flag;
 431         else if (fio->type == NODE)
 432                 io_flag = fio->sbi->node_io_flag;
 433         else
 434                 return 0;
 435
 436         fua_flag = io_flag & temp_mask;
 437         meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
 438
 439         /*
 440          * data/node io flag bits per temp:
 441          *      REQ_META     |      REQ_FUA      |
 442          *    5 |    4 |   3 |    2 |    1 |   0 |
 443          * Cold | Warm | Hot | Cold | Warm | Hot |
 444          */
 445         if ((1 << fio->temp) & meta_flag)
 446                 op_flags |= REQ_META;
 447         if ((1 << fio->temp) & fua_flag)
 448                 op_flags |= REQ_FUA;
 449         return op_flags;
 450 }
 451
 452 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 453 {
 454         struct f2fs_sb_info *sbi = fio->sbi;
 455         struct block_device *bdev;
 456         sector_t sector;
 457         struct bio *bio;
 458
 459         bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
 460         bio = bio_alloc_bioset(bdev, npages,
 461                                 fio->op | fio->op_flags | f2fs_io_flags(fio),
 462                                 GFP_NOIO, &f2fs_bioset);
 463         bio->bi_iter.bi_sector = sector;
 464         if (is_read_io(fio->op)) {
 465                 bio->bi_end_io = f2fs_read_end_io;
 466                 bio->bi_private = NULL;
 467         } else {
 468                 bio->bi_end_io = f2fs_write_end_io;
 469                 bio->bi_private = sbi;
 470         }
 471         iostat_alloc_and_bind_ctx(sbi, bio, NULL);
 472
 473         if (fio->io_wbc)
 474                 wbc_init_bio(fio->io_wbc, bio);
 475
 476         return bio;
 477 }
 478
 479 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
 480                                   pgoff_t first_idx,
 481                                   const struct f2fs_io_info *fio,
 482                                   gfp_t gfp_mask)
 483 {
 484         /*
 485          * The f2fs garbage collector sets ->encrypted_page when it wants to
 486          * read/write raw data without encryption.
 487          */
 488         if (!fio || !fio->encrypted_page)
 489                 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
 490 }
 491
 492 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 493                                      pgoff_t next_idx,
 494                                      const struct f2fs_io_info *fio)
 495 {
 496         /*
 497          * The f2fs garbage collector sets ->encrypted_page when it wants to
 498          * read/write raw data without encryption.
 499          */
 500         if (fio && fio->encrypted_page)
 501                 return !bio_has_crypt_ctx(bio);
 502
 503         return fscrypt_mergeable_bio(bio, inode, next_idx);
 504 }
 505
 506 void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
 507                                  enum page_type type)
 508 {
 509         WARN_ON_ONCE(!is_read_io(bio_op(bio)));
 510         trace_f2fs_submit_read_bio(sbi->sb, type, bio);
 511
 512         iostat_update_submit_ctx(bio, type);
 513         submit_bio(bio);
 514 }
 515
 516 static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio)
 517 {
 518         unsigned int start =
 519                 (bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi);
 520
 521         if (start == 0)
 522                 return;
 523
 524         /* fill dummy pages */
 525         for (; start < F2FS_IO_SIZE(sbi); start++) {
 526                 struct page *page =
 527                         mempool_alloc(sbi->write_io_dummy,
 528                                       GFP_NOIO | __GFP_NOFAIL);
 529                 f2fs_bug_on(sbi, !page);
 530
 531                 lock_page(page);
 532
 533                 zero_user_segment(page, 0, PAGE_SIZE);
 534                 set_page_private_dummy(page);
 535
 536                 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
 537                         f2fs_bug_on(sbi, 1);
 538         }
 539 }
 540
 541 static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
 542                                   enum page_type type)
 543 {
 544         WARN_ON_ONCE(is_read_io(bio_op(bio)));
 545
 546         if (type == DATA || type == NODE) {
 547                 if (f2fs_lfs_mode(sbi) && current->plug)
 548                         blk_finish_plug(current->plug);
 549
 550                 if (F2FS_IO_ALIGNED(sbi)) {
 551                         f2fs_align_write_bio(sbi, bio);
 552                         /*
 553                          * In the NODE case, we lose next block address chain.
 554                          * So, we need to do checkpoint in f2fs_sync_file.
 555                          */
 556                         if (type == NODE)
 557                                 set_sbi_flag(sbi, SBI_NEED_CP);
 558                 }
 559         }
 560
 561         trace_f2fs_submit_write_bio(sbi->sb, type, bio);
 562         iostat_update_submit_ctx(bio, type);
 563         submit_bio(bio);
 564 }
 565
 566 static void __submit_merged_bio(struct f2fs_bio_info *io)
 567 {
 568         struct f2fs_io_info *fio = &io->fio;
 569
 570         if (!io->bio)
 571                 return;
 572
 573         if (is_read_io(fio->op)) {
 574                 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 575                 f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
 576         } else {
 577                 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
 578                 f2fs_submit_write_bio(io->sbi, io->bio, fio->type);
 579         }
 580         io->bio = NULL;
 581 }
 582
 583 static bool __has_merged_page(struct bio *bio, struct inode *inode,
 584                                                 struct page *page, nid_t ino)
 585 {
 586         struct bio_vec *bvec;
 587         struct bvec_iter_all iter_all;
 588
 589         if (!bio)
 590                 return false;
 591
 592         if (!inode && !page && !ino)
 593                 return true;
 594
 595         bio_for_each_segment_all(bvec, bio, iter_all) {
 596                 struct page *target = bvec->bv_page;
 597
 598                 if (fscrypt_is_bounce_page(target)) {
 599                         target = fscrypt_pagecache_page(target);
 600                         if (IS_ERR(target))
 601                                 continue;
 602                 }
 603                 if (f2fs_is_compressed_page(target)) {
 604                         target = f2fs_compress_control_page(target);
 605                         if (IS_ERR(target))
 606                                 continue;
 607                 }
 608
 609                 if (inode && inode == target->mapping->host)
 610                         return true;
 611                 if (page && page == target)
 612                         return true;
 613                 if (ino && ino == ino_of_node(target))
 614                         return true;
 615         }
 616
 617         return false;
 618 }
 619
 620 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
 621 {
 622         int i;
 623
 624         for (i = 0; i < NR_PAGE_TYPE; i++) {
 625                 int n = (i == META) ? 1 : NR_TEMP_TYPE;
 626                 int j;
 627
 628                 sbi->write_io[i] = f2fs_kmalloc(sbi,
 629                                 array_size(n, sizeof(struct f2fs_bio_info)),
 630                                 GFP_KERNEL);
 631                 if (!sbi->write_io[i])
 632                         return -ENOMEM;
 633
 634                 for (j = HOT; j < n; j++) {
 635                         init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
 636                         sbi->write_io[i][j].sbi = sbi;
 637                         sbi->write_io[i][j].bio = NULL;
 638                         spin_lock_init(&sbi->write_io[i][j].io_lock);
 639                         INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
 640                         INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
 641                         init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
 642                 }
 643         }
 644
 645         return 0;
 646 }
 647
 648 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 649                                 enum page_type type, enum temp_type temp)
 650 {
 651         enum page_type btype = PAGE_TYPE_OF_BIO(type);
 652         struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 653
 654         f2fs_down_write(&io->io_rwsem);
 655
 656         if (!io->bio)
 657                 goto unlock_out;
 658
 659         /* change META to META_FLUSH in the checkpoint procedure */
 660         if (type >= META_FLUSH) {
 661                 io->fio.type = META_FLUSH;
 662                 io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
 663                 if (!test_opt(sbi, NOBARRIER))
 664                         io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
 665         }
 666         __submit_merged_bio(io);
 667 unlock_out:
 668         f2fs_up_write(&io->io_rwsem);
 669 }
 670
 671 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
 672                                 struct inode *inode, struct page *page,
 673                                 nid_t ino, enum page_type type, bool force)
 674 {
 675         enum temp_type temp;
 676         bool ret = true;
 677
 678         for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
 679                 if (!force)     {
 680                         enum page_type btype = PAGE_TYPE_OF_BIO(type);
 681                         struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 682
 683                         f2fs_down_read(&io->io_rwsem);
 684                         ret = __has_merged_page(io->bio, inode, page, ino);
 685                         f2fs_up_read(&io->io_rwsem);
 686                 }
 687                 if (ret)
 688                         __f2fs_submit_merged_write(sbi, type, temp);
 689
 690                 /* TODO: use HOT temp only for meta pages now. */
 691                 if (type >= META)
 692                         break;
 693         }
 694 }
 695
 696 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 697 {
 698         __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
 699 }
 700
 701 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 702                                 struct inode *inode, struct page *page,
 703                                 nid_t ino, enum page_type type)
 704 {
 705         __submit_merged_write_cond(sbi, inode, page, ino, type, false);
 706 }
 707
 708 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
 709 {
 710         f2fs_submit_merged_write(sbi, DATA);
 711         f2fs_submit_merged_write(sbi, NODE);
 712         f2fs_submit_merged_write(sbi, META);
 713 }
 714
 715 /*
 716  * Fill the locked page with data located in the block address.
 717  * A caller needs to unlock the page on failure.
 718  */
 719 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 720 {
 721         struct bio *bio;
 722         struct page *page = fio->encrypted_page ?
 723                         fio->encrypted_page : fio->page;
 724
 725         if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
 726                         fio->is_por ? META_POR : (__is_meta_io(fio) ?
 727                         META_GENERIC : DATA_GENERIC_ENHANCE))) {
 728                 f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
 729                 return -EFSCORRUPTED;
 730         }
 731
 732         trace_f2fs_submit_page_bio(page, fio);
 733
 734         /* Allocate a new bio */
 735         bio = __bio_alloc(fio, 1);
 736
 737         f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
 738                                fio->page->index, fio, GFP_NOIO);
 739
 740         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 741                 bio_put(bio);
 742                 return -EFAULT;
 743         }
 744
 745         if (fio->io_wbc && !is_read_io(fio->op))
 746                 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
 747
 748         inc_page_count(fio->sbi, is_read_io(fio->op) ?
 749                         __read_io_type(page) : WB_DATA_TYPE(fio->page));
 750
 751         if (is_read_io(bio_op(bio)))
 752                 f2fs_submit_read_bio(fio->sbi, bio, fio->type);
 753         else
 754                 f2fs_submit_write_bio(fio->sbi, bio, fio->type);
 755         return 0;
 756 }
 757
 758 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 759                                 block_t last_blkaddr, block_t cur_blkaddr)
 760 {
 761         if (unlikely(sbi->max_io_bytes &&
 762                         bio->bi_iter.bi_size >= sbi->max_io_bytes))
 763                 return false;
 764         if (last_blkaddr + 1 != cur_blkaddr)
 765                 return false;
 766         return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
 767 }
 768
 769 static bool io_type_is_mergeable(struct f2fs_bio_info *io,
 770                                                 struct f2fs_io_info *fio)
 771 {
 772         if (io->fio.op != fio->op)
 773                 return false;
 774         return io->fio.op_flags == fio->op_flags;
 775 }
 776
 777 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 778                                         struct f2fs_bio_info *io,
 779                                         struct f2fs_io_info *fio,
 780                                         block_t last_blkaddr,
 781                                         block_t cur_blkaddr)
 782 {
 783         if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
 784                 unsigned int filled_blocks =
 785                                 F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
 786                 unsigned int io_size = F2FS_IO_SIZE(sbi);
 787                 unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
 788
 789                 /* IOs in bio is aligned and left space of vectors is not enough */
 790                 if (!(filled_blocks % io_size) && left_vecs < io_size)
 791                         return false;
 792         }
 793         if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
 794                 return false;
 795         return io_type_is_mergeable(io, fio);
 796 }
 797
 798 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
 799                                 struct page *page, enum temp_type temp)
 800 {
 801         struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 802         struct bio_entry *be;
 803
 804         be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
 805         be->bio = bio;
 806         bio_get(bio);
 807
 808         if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
 809                 f2fs_bug_on(sbi, 1);
 810
 811         f2fs_down_write(&io->bio_list_lock);
 812         list_add_tail(&be->list, &io->bio_list);
 813         f2fs_up_write(&io->bio_list_lock);
 814 }
 815
 816 static void del_bio_entry(struct bio_entry *be)
 817 {
 818         list_del(&be->list);
 819         kmem_cache_free(bio_entry_slab, be);
 820 }
 821
 822 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
 823                                                         struct page *page)
 824 {
 825         struct f2fs_sb_info *sbi = fio->sbi;
 826         enum temp_type temp;
 827         bool found = false;
 828         int ret = -EAGAIN;
 829
 830         for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
 831                 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 832                 struct list_head *head = &io->bio_list;
 833                 struct bio_entry *be;
 834
 835                 f2fs_down_write(&io->bio_list_lock);
 836                 list_for_each_entry(be, head, list) {
 837                         if (be->bio != *bio)
 838                                 continue;
 839
 840                         found = true;
 841
 842                         f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
 843                                                             *fio->last_block,
 844                                                             fio->new_blkaddr));
 845                         if (f2fs_crypt_mergeable_bio(*bio,
 846                                         fio->page->mapping->host,
 847                                         fio->page->index, fio) &&
 848                             bio_add_page(*bio, page, PAGE_SIZE, 0) ==
 849                                         PAGE_SIZE) {
 850                                 ret = 0;
 851                                 break;
 852                         }
 853
 854                         /* page can't be merged into bio; submit the bio */
 855                         del_bio_entry(be);
 856                         f2fs_submit_write_bio(sbi, *bio, DATA);
 857                         break;
 858                 }
 859                 f2fs_up_write(&io->bio_list_lock);
 860         }
 861
 862         if (ret) {
 863                 bio_put(*bio);
 864                 *bio = NULL;
 865         }
 866
 867         return ret;
 868 }
 869
 870 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
 871                                         struct bio **bio, struct page *page)
 872 {
 873         enum temp_type temp;
 874         bool found = false;
 875         struct bio *target = bio ? *bio : NULL;
 876
 877         for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
 878                 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 879                 struct list_head *head = &io->bio_list;
 880                 struct bio_entry *be;
 881
 882                 if (list_empty(head))
 883                         continue;
 884
 885                 f2fs_down_read(&io->bio_list_lock);
 886                 list_for_each_entry(be, head, list) {
 887                         if (target)
 888                                 found = (target == be->bio);
 889                         else
 890                                 found = __has_merged_page(be->bio, NULL,
 891                                                                 page, 0);
 892                         if (found)
 893                                 break;
 894                 }
 895                 f2fs_up_read(&io->bio_list_lock);
 896
 897                 if (!found)
 898                         continue;
 899
 900                 found = false;
 901
 902                 f2fs_down_write(&io->bio_list_lock);
 903                 list_for_each_entry(be, head, list) {
 904                         if (target)
 905                                 found = (target == be->bio);
 906                         else
 907                                 found = __has_merged_page(be->bio, NULL,
 908                                                                 page, 0);
 909                         if (found) {
 910                                 target = be->bio;
 911                                 del_bio_entry(be);
 912                                 break;
 913                         }
 914                 }
 915                 f2fs_up_write(&io->bio_list_lock);
 916         }
 917
 918         if (found)
 919                 f2fs_submit_write_bio(sbi, target, DATA);
 920         if (bio && *bio) {
 921                 bio_put(*bio);
 922                 *bio = NULL;
 923         }
 924 }
 925
 926 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
 927 {
 928         struct bio *bio = *fio->bio;
 929         struct page *page = fio->encrypted_page ?
 930                         fio->encrypted_page : fio->page;
 931
 932         if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
 933                         __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) {
 934                 f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
 935                 return -EFSCORRUPTED;
 936         }
 937
 938         trace_f2fs_submit_page_bio(page, fio);
 939
 940         if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
 941                                                 fio->new_blkaddr))
 942                 f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
 943 alloc_new:
 944         if (!bio) {
 945                 bio = __bio_alloc(fio, BIO_MAX_VECS);
 946                 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
 947                                        fio->page->index, fio, GFP_NOIO);
 948
 949                 add_bio_entry(fio->sbi, bio, page, fio->temp);
 950         } else {
 951                 if (add_ipu_page(fio, &bio, page))
 952                         goto alloc_new;
 953         }
 954
 955         if (fio->io_wbc)
 956                 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
 957
 958         inc_page_count(fio->sbi, WB_DATA_TYPE(page));
 959
 960         *fio->last_block = fio->new_blkaddr;
 961         *fio->bio = bio;
 962
 963         return 0;
 964 }
 965
 966 void f2fs_submit_page_write(struct f2fs_io_info *fio)
 967 {
 968         struct f2fs_sb_info *sbi = fio->sbi;
 969         enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
 970         struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
 971         struct page *bio_page;
 972
 973         f2fs_bug_on(sbi, is_read_io(fio->op));
 974
 975         f2fs_down_write(&io->io_rwsem);
 976 next:
 977         if (fio->in_list) {
 978                 spin_lock(&io->io_lock);
 979                 if (list_empty(&io->io_list)) {
 980                         spin_unlock(&io->io_lock);
 981                         goto out;
 982                 }
 983                 fio = list_first_entry(&io->io_list,
 984                                                 struct f2fs_io_info, list);
 985                 list_del(&fio->list);
 986                 spin_unlock(&io->io_lock);
 987         }
 988
 989         verify_fio_blkaddr(fio);
 990
 991         if (fio->encrypted_page)
 992                 bio_page = fio->encrypted_page;
 993         else if (fio->compressed_page)
 994                 bio_page = fio->compressed_page;
 995         else
 996                 bio_page = fio->page;
 997
 998         /* set submitted = true as a return value */
 999         fio->submitted = 1;
1000
1001         inc_page_count(sbi, WB_DATA_TYPE(bio_page));
1002
1003         if (io->bio &&
1004             (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
1005                               fio->new_blkaddr) ||
1006              !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
1007                                        bio_page->index, fio)))
1008                 __submit_merged_bio(io);
1009 alloc_new:
1010         if (io->bio == NULL) {
1011                 if (F2FS_IO_ALIGNED(sbi) &&
1012                                 (fio->type == DATA || fio->type == NODE) &&
1013                                 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
1014                         dec_page_count(sbi, WB_DATA_TYPE(bio_page));
1015                         fio->retry = 1;
1016                         goto skip;
1017                 }
1018                 io->bio = __bio_alloc(fio, BIO_MAX_VECS);
1019                 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
1020                                        bio_page->index, fio, GFP_NOIO);
1021                 io->fio = *fio;
1022         }
1023
1024         if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
1025                 __submit_merged_bio(io);
1026                 goto alloc_new;
1027         }
1028
1029         if (fio->io_wbc)
1030                 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
1031
1032         io->last_block_in_bio = fio->new_blkaddr;
1033
1034         trace_f2fs_submit_page_write(fio->page, fio);
1035 skip:
1036         if (fio->in_list)
1037                 goto next;
1038 out:
1039         if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1040                                 !f2fs_is_checkpoint_ready(sbi))
1041                 __submit_merged_bio(io);
1042         f2fs_up_write(&io->io_rwsem);
1043 }
1044
1045 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
1046                                       unsigned nr_pages, blk_opf_t op_flag,
1047                                       pgoff_t first_idx, bool for_write)
1048 {
1049         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1050         struct bio *bio;
1051         struct bio_post_read_ctx *ctx = NULL;
1052         unsigned int post_read_steps = 0;
1053         sector_t sector;
1054         struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
1055
1056         bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
1057                                REQ_OP_READ | op_flag,
1058                                for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
1059         if (!bio)
1060                 return ERR_PTR(-ENOMEM);
1061         bio->bi_iter.bi_sector = sector;
1062         f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1063         bio->bi_end_io = f2fs_read_end_io;
1064
1065         if (fscrypt_inode_uses_fs_layer_crypto(inode))
1066                 post_read_steps |= STEP_DECRYPT;
1067
1068         if (f2fs_need_verity(inode, first_idx))
1069                 post_read_steps |= STEP_VERITY;
1070
1071         /*
1072          * STEP_DECOMPRESS is handled specially, since a compressed file might
1073          * contain both compressed and uncompressed clusters.  We'll allocate a
1074          * bio_post_read_ctx if the file is compressed, but the caller is
1075          * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1076          */
1077
1078         if (post_read_steps || f2fs_compressed_file(inode)) {
1079                 /* Due to the mempool, this never fails. */
1080                 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1081                 ctx->bio = bio;
1082                 ctx->sbi = sbi;
1083                 ctx->enabled_steps = post_read_steps;
1084                 ctx->fs_blkaddr = blkaddr;
1085                 ctx->decompression_attempted = false;
1086                 bio->bi_private = ctx;
1087         }
1088         iostat_alloc_and_bind_ctx(sbi, bio, ctx);
1089
1090         return bio;
1091 }
1092
1093 /* This can handle encryption stuffs */
1094 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
1095                                  block_t blkaddr, blk_opf_t op_flags,
1096                                  bool for_write)
1097 {
1098         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1099         struct bio *bio;
1100
1101         bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
1102                                         page->index, for_write);
1103         if (IS_ERR(bio))
1104                 return PTR_ERR(bio);
1105
1106         /* wait for GCed page writeback via META_MAPPING */
1107         f2fs_wait_on_block_writeback(inode, blkaddr);
1108
1109         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
1110                 bio_put(bio);
1111                 return -EFAULT;
1112         }
1113         inc_page_count(sbi, F2FS_RD_DATA);
1114         f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
1115         f2fs_submit_read_bio(sbi, bio, DATA);
1116         return 0;
1117 }
1118
1119 static void __set_data_blkaddr(struct dnode_of_data *dn)
1120 {
1121         struct f2fs_node *rn = F2FS_NODE(dn->node_page);
1122         __le32 *addr_array;
1123         int base = 0;
1124
1125         if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
1126                 base = get_extra_isize(dn->inode);
1127
1128         /* Get physical address of data block */
1129         addr_array = blkaddr_in_node(rn);
1130         addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1131 }
1132
1133 /*
1134  * Lock ordering for the change of data block address:
1135  * ->data_page
1136  *  ->node_page
1137  *    update block addresses in the node page
1138  */
1139 void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
1140 {
1141         f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1142         __set_data_blkaddr(dn);
1143         if (set_page_dirty(dn->node_page))
1144                 dn->node_changed = true;
1145 }
1146
1147 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1148 {
1149         dn->data_blkaddr = blkaddr;
1150         f2fs_set_data_blkaddr(dn);
1151         f2fs_update_read_extent_cache(dn);
1152 }
1153
1154 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
1155 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1156 {
1157         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1158         int err;
1159
1160         if (!count)
1161                 return 0;
1162
1163         if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1164                 return -EPERM;
1165         if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1166                 return err;
1167
1168         trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1169                                                 dn->ofs_in_node, count);
1170
1171         f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1172
1173         for (; count > 0; dn->ofs_in_node++) {
1174                 block_t blkaddr = f2fs_data_blkaddr(dn);
1175
1176                 if (blkaddr == NULL_ADDR) {
1177                         dn->data_blkaddr = NEW_ADDR;
1178                         __set_data_blkaddr(dn);
1179                         count--;
1180                 }
1181         }
1182
1183         if (set_page_dirty(dn->node_page))
1184                 dn->node_changed = true;
1185         return 0;
1186 }
1187
1188 /* Should keep dn->ofs_in_node unchanged */
1189 int f2fs_reserve_new_block(struct dnode_of_data *dn)
1190 {
1191         unsigned int ofs_in_node = dn->ofs_in_node;
1192         int ret;
1193
1194         ret = f2fs_reserve_new_blocks(dn, 1);
1195         dn->ofs_in_node = ofs_in_node;
1196         return ret;
1197 }
1198
1199 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1200 {
1201         bool need_put = dn->inode_page ? false : true;
1202         int err;
1203
1204         err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1205         if (err)
1206                 return err;
1207
1208         if (dn->data_blkaddr == NULL_ADDR)
1209                 err = f2fs_reserve_new_block(dn);
1210         if (err || need_put)
1211                 f2fs_put_dnode(dn);
1212         return err;
1213 }
1214
1215 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
1216                                      blk_opf_t op_flags, bool for_write,
1217                                      pgoff_t *next_pgofs)
1218 {
1219         struct address_space *mapping = inode->i_mapping;
1220         struct dnode_of_data dn;
1221         struct page *page;
1222         int err;
1223
1224         page = f2fs_grab_cache_page(mapping, index, for_write);
1225         if (!page)
1226                 return ERR_PTR(-ENOMEM);
1227
1228         if (f2fs_lookup_read_extent_cache_block(inode, index,
1229                                                 &dn.data_blkaddr)) {
1230                 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1231                                                 DATA_GENERIC_ENHANCE_READ)) {
1232                         err = -EFSCORRUPTED;
1233                         f2fs_handle_error(F2FS_I_SB(inode),
1234                                                 ERROR_INVALID_BLKADDR);
1235                         goto put_err;
1236                 }
1237                 goto got_it;
1238         }
1239
1240         set_new_dnode(&dn, inode, NULL, NULL, 0);
1241         err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1242         if (err) {
1243                 if (err == -ENOENT && next_pgofs)
1244                         *next_pgofs = f2fs_get_next_page_offset(&dn, index);
1245                 goto put_err;
1246         }
1247         f2fs_put_dnode(&dn);
1248
1249         if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1250                 err = -ENOENT;
1251                 if (next_pgofs)
1252                         *next_pgofs = index + 1;
1253                 goto put_err;
1254         }
1255         if (dn.data_blkaddr != NEW_ADDR &&
1256                         !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1257                                                 dn.data_blkaddr,
1258                                                 DATA_GENERIC_ENHANCE)) {
1259                 err = -EFSCORRUPTED;
1260                 f2fs_handle_error(F2FS_I_SB(inode),
1261                                         ERROR_INVALID_BLKADDR);
1262                 goto put_err;
1263         }
1264 got_it:
1265         if (PageUptodate(page)) {
1266                 unlock_page(page);
1267                 return page;
1268         }
1269
1270         /*
1271          * A new dentry page is allocated but not able to be written, since its
1272          * new inode page couldn't be allocated due to -ENOSPC.
1273          * In such the case, its blkaddr can be remained as NEW_ADDR.
1274          * see, f2fs_add_link -> f2fs_get_new_data_page ->
1275          * f2fs_init_inode_metadata.
1276          */
1277         if (dn.data_blkaddr == NEW_ADDR) {
1278                 zero_user_segment(page, 0, PAGE_SIZE);
1279                 if (!PageUptodate(page))
1280                         SetPageUptodate(page);
1281                 unlock_page(page);
1282                 return page;
1283         }
1284
1285         err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
1286                                                 op_flags, for_write);
1287         if (err)
1288                 goto put_err;
1289         return page;
1290
1291 put_err:
1292         f2fs_put_page(page, 1);
1293         return ERR_PTR(err);
1294 }
1295
1296 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index,
1297                                         pgoff_t *next_pgofs)
1298 {
1299         struct address_space *mapping = inode->i_mapping;
1300         struct page *page;
1301
1302         page = find_get_page(mapping, index);
1303         if (page && PageUptodate(page))
1304                 return page;
1305         f2fs_put_page(page, 0);
1306
1307         page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs);
1308         if (IS_ERR(page))
1309                 return page;
1310
1311         if (PageUptodate(page))
1312                 return page;
1313
1314         wait_on_page_locked(page);
1315         if (unlikely(!PageUptodate(page))) {
1316                 f2fs_put_page(page, 0);
1317                 return ERR_PTR(-EIO);
1318         }
1319         return page;
1320 }
1321
1322 /*
1323  * If it tries to access a hole, return an error.
1324  * Because, the callers, functions in dir.c and GC, should be able to know
1325  * whether this page exists or not.
1326  */
1327 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
1328                                                         bool for_write)
1329 {
1330         struct address_space *mapping = inode->i_mapping;
1331         struct page *page;
1332 repeat:
1333         page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL);
1334         if (IS_ERR(page))
1335                 return page;
1336
1337         /* wait for read completion */
1338         lock_page(page);
1339         if (unlikely(page->mapping != mapping)) {
1340                 f2fs_put_page(page, 1);
1341                 goto repeat;
1342         }
1343         if (unlikely(!PageUptodate(page))) {
1344                 f2fs_put_page(page, 1);
1345                 return ERR_PTR(-EIO);
1346         }
1347         return page;
1348 }
1349
1350 /*
1351  * Caller ensures that this data page is never allocated.
1352  * A new zero-filled data page is allocated in the page cache.
1353  *
1354  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1355  * f2fs_unlock_op().
1356  * Note that, ipage is set only by make_empty_dir, and if any error occur,
1357  * ipage should be released by this function.
1358  */
1359 struct page *f2fs_get_new_data_page(struct inode *inode,
1360                 struct page *ipage, pgoff_t index, bool new_i_size)
1361 {
1362         struct address_space *mapping = inode->i_mapping;
1363         struct page *page;
1364         struct dnode_of_data dn;
1365         int err;
1366
1367         page = f2fs_grab_cache_page(mapping, index, true);
1368         if (!page) {
1369                 /*
1370                  * before exiting, we should make sure ipage will be released
1371                  * if any error occur.
1372                  */
1373                 f2fs_put_page(ipage, 1);
1374                 return ERR_PTR(-ENOMEM);
1375         }
1376
1377         set_new_dnode(&dn, inode, ipage, NULL, 0);
1378         err = f2fs_reserve_block(&dn, index);
1379         if (err) {
1380                 f2fs_put_page(page, 1);
1381                 return ERR_PTR(err);
1382         }
1383         if (!ipage)
1384                 f2fs_put_dnode(&dn);
1385
1386         if (PageUptodate(page))
1387                 goto got_it;
1388
1389         if (dn.data_blkaddr == NEW_ADDR) {
1390                 zero_user_segment(page, 0, PAGE_SIZE);
1391                 if (!PageUptodate(page))
1392                         SetPageUptodate(page);
1393         } else {
1394                 f2fs_put_page(page, 1);
1395
1396                 /* if ipage exists, blkaddr should be NEW_ADDR */
1397                 f2fs_bug_on(F2FS_I_SB(inode), ipage);
1398                 page = f2fs_get_lock_data_page(inode, index, true);
1399                 if (IS_ERR(page))
1400                         return page;
1401         }
1402 got_it:
1403         if (new_i_size && i_size_read(inode) <
1404                                 ((loff_t)(index + 1) << PAGE_SHIFT))
1405                 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1406         return page;
1407 }
1408
1409 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1410 {
1411         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1412         struct f2fs_summary sum;
1413         struct node_info ni;
1414         block_t old_blkaddr;
1415         blkcnt_t count = 1;
1416         int err;
1417
1418         if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1419                 return -EPERM;
1420
1421         err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
1422         if (err)
1423                 return err;
1424
1425         dn->data_blkaddr = f2fs_data_blkaddr(dn);
1426         if (dn->data_blkaddr == NULL_ADDR) {
1427                 err = inc_valid_block_count(sbi, dn->inode, &count);
1428                 if (unlikely(err))
1429                         return err;
1430         }
1431
1432         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1433         old_blkaddr = dn->data_blkaddr;
1434         f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1435                                 &sum, seg_type, NULL);
1436         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
1437                 invalidate_mapping_pages(META_MAPPING(sbi),
1438                                         old_blkaddr, old_blkaddr);
1439                 f2fs_invalidate_compress_page(sbi, old_blkaddr);
1440         }
1441         f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1442         return 0;
1443 }
1444
1445 static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag)
1446 {
1447         if (flag == F2FS_GET_BLOCK_PRE_AIO)
1448                 f2fs_down_read(&sbi->node_change);
1449         else
1450                 f2fs_lock_op(sbi);
1451 }
1452
1453 static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag)
1454 {
1455         if (flag == F2FS_GET_BLOCK_PRE_AIO)
1456                 f2fs_up_read(&sbi->node_change);
1457         else
1458                 f2fs_unlock_op(sbi);
1459 }
1460
1461 int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
1462 {
1463         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1464         int err = 0;
1465
1466         f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
1467         if (!f2fs_lookup_read_extent_cache_block(dn->inode, index,
1468                                                 &dn->data_blkaddr))
1469                 err = f2fs_reserve_block(dn, index);
1470         f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
1471
1472         return err;
1473 }
1474
1475 static int f2fs_map_no_dnode(struct inode *inode,
1476                 struct f2fs_map_blocks *map, struct dnode_of_data *dn,
1477                 pgoff_t pgoff)
1478 {
1479         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1480
1481         /*
1482          * There is one exceptional case that read_node_page() may return
1483          * -ENOENT due to filesystem has been shutdown or cp_error, return
1484          * -EIO in that case.
1485          */
1486         if (map->m_may_create &&
1487             (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi)))
1488                 return -EIO;
1489
1490         if (map->m_next_pgofs)
1491                 *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff);
1492         if (map->m_next_extent)
1493                 *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff);
1494         return 0;
1495 }
1496
1497 static bool f2fs_map_blocks_cached(struct inode *inode,
1498                 struct f2fs_map_blocks *map, int flag)
1499 {
1500         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1501         unsigned int maxblocks = map->m_len;
1502         pgoff_t pgoff = (pgoff_t)map->m_lblk;
1503         struct extent_info ei = {};
1504
1505         if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei))
1506                 return false;
1507
1508         map->m_pblk = ei.blk + pgoff - ei.fofs;
1509         map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff);
1510         map->m_flags = F2FS_MAP_MAPPED;
1511         if (map->m_next_extent)
1512                 *map->m_next_extent = pgoff + map->m_len;
1513
1514         /* for hardware encryption, but to avoid potential issue in future */
1515         if (flag == F2FS_GET_BLOCK_DIO)
1516                 f2fs_wait_on_block_writeback_range(inode,
1517                                         map->m_pblk, map->m_len);
1518
1519         if (f2fs_allow_multi_device_dio(sbi, flag)) {
1520                 int bidx = f2fs_target_device_index(sbi, map->m_pblk);
1521                 struct f2fs_dev_info *dev = &sbi->devs[bidx];
1522
1523                 map->m_bdev = dev->bdev;
1524                 map->m_pblk -= dev->start_blk;
1525                 map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk);
1526         } else {
1527                 map->m_bdev = inode->i_sb->s_bdev;
1528         }
1529         return true;
1530 }
1531
1532 /*
1533  * f2fs_map_blocks() tries to find or build mapping relationship which
1534  * maps continuous logical blocks to physical blocks, and return such
1535  * info via f2fs_map_blocks structure.
1536  */
1537 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
1538 {
1539         unsigned int maxblocks = map->m_len;
1540         struct dnode_of_data dn;
1541         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1542         int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1543         pgoff_t pgofs, end_offset, end;
1544         int err = 0, ofs = 1;
1545         unsigned int ofs_in_node, last_ofs_in_node;
1546         blkcnt_t prealloc;
1547         block_t blkaddr;
1548         unsigned int start_pgofs;
1549         int bidx = 0;
1550         bool is_hole;
1551
1552         if (!maxblocks)
1553                 return 0;
1554
1555         if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
1556                 goto out;
1557
1558         map->m_bdev = inode->i_sb->s_bdev;
1559         map->m_multidev_dio =
1560                 f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1561
1562         map->m_len = 0;
1563         map->m_flags = 0;
1564
1565         /* it only supports block size == page size */
1566         pgofs = (pgoff_t)map->m_lblk;
1567         end = pgofs + maxblocks;
1568
1569 next_dnode:
1570         if (map->m_may_create)
1571                 f2fs_map_lock(sbi, flag);
1572
1573         /* When reading holes, we need its node page */
1574         set_new_dnode(&dn, inode, NULL, NULL, 0);
1575         err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1576         if (err) {
1577                 if (flag == F2FS_GET_BLOCK_BMAP)
1578                         map->m_pblk = 0;
1579                 if (err == -ENOENT)
1580                         err = f2fs_map_no_dnode(inode, map, &dn, pgofs);
1581                 goto unlock_out;
1582         }
1583
1584         start_pgofs = pgofs;
1585         prealloc = 0;
1586         last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1587         end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1588
1589 next_block:
1590         blkaddr = f2fs_data_blkaddr(&dn);
1591         is_hole = !__is_valid_data_blkaddr(blkaddr);
1592         if (!is_hole &&
1593             !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1594                 err = -EFSCORRUPTED;
1595                 f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
1596                 goto sync_out;
1597         }
1598
1599         /* use out-place-update for direct IO under LFS mode */
1600         if (map->m_may_create &&
1601             (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
1602                 if (unlikely(f2fs_cp_error(sbi))) {
1603                         err = -EIO;
1604                         goto sync_out;
1605                 }
1606
1607                 switch (flag) {
1608                 case F2FS_GET_BLOCK_PRE_AIO:
1609                         if (blkaddr == NULL_ADDR) {
1610                                 prealloc++;
1611                                 last_ofs_in_node = dn.ofs_in_node;
1612                         }
1613                         break;
1614                 case F2FS_GET_BLOCK_PRE_DIO:
1615                 case F2FS_GET_BLOCK_DIO:
1616                         err = __allocate_data_block(&dn, map->m_seg_type);
1617                         if (err)
1618                                 goto sync_out;
1619                         if (flag == F2FS_GET_BLOCK_PRE_DIO)
1620                                 file_need_truncate(inode);
1621                         set_inode_flag(inode, FI_APPEND_WRITE);
1622                         break;
1623                 default:
1624                         WARN_ON_ONCE(1);
1625                         err = -EIO;
1626                         goto sync_out;
1627                 }
1628
1629                 blkaddr = dn.data_blkaddr;
1630                 if (is_hole)
1631                         map->m_flags |= F2FS_MAP_NEW;
1632         } else if (is_hole) {
1633                 if (f2fs_compressed_file(inode) &&
1634                     f2fs_sanity_check_cluster(&dn) &&
1635                     (flag != F2FS_GET_BLOCK_FIEMAP ||
1636                      IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
1637                         err = -EFSCORRUPTED;
1638                         f2fs_handle_error(sbi,
1639                                         ERROR_CORRUPTED_CLUSTER);
1640                         goto sync_out;
1641                 }
1642
1643                 switch (flag) {
1644                 case F2FS_GET_BLOCK_PRECACHE:
1645                         goto sync_out;
1646                 case F2FS_GET_BLOCK_BMAP:
1647                         map->m_pblk = 0;
1648                         goto sync_out;
1649                 case F2FS_GET_BLOCK_FIEMAP:
1650                         if (blkaddr == NULL_ADDR) {
1651                                 if (map->m_next_pgofs)
1652                                         *map->m_next_pgofs = pgofs + 1;
1653                                 goto sync_out;
1654                         }
1655                         break;
1656                 default:
1657                         /* for defragment case */
1658                         if (map->m_next_pgofs)
1659                                 *map->m_next_pgofs = pgofs + 1;
1660                         goto sync_out;
1661                 }
1662         }
1663
1664         if (flag == F2FS_GET_BLOCK_PRE_AIO)
1665                 goto skip;
1666
1667         if (map->m_multidev_dio)
1668                 bidx = f2fs_target_device_index(sbi, blkaddr);
1669
1670         if (map->m_len == 0) {
1671                 /* reserved delalloc block should be mapped for fiemap. */
1672                 if (blkaddr == NEW_ADDR)
1673                         map->m_flags |= F2FS_MAP_DELALLOC;
1674                 map->m_flags |= F2FS_MAP_MAPPED;
1675
1676                 map->m_pblk = blkaddr;
1677                 map->m_len = 1;
1678
1679                 if (map->m_multidev_dio)
1680                         map->m_bdev = FDEV(bidx).bdev;
1681         } else if ((map->m_pblk != NEW_ADDR &&
1682                         blkaddr == (map->m_pblk + ofs)) ||
1683                         (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1684                         flag == F2FS_GET_BLOCK_PRE_DIO) {
1685                 if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1686                         goto sync_out;
1687                 ofs++;
1688                 map->m_len++;
1689         } else {
1690                 goto sync_out;
1691         }
1692
1693 skip:
1694         dn.ofs_in_node++;
1695         pgofs++;
1696
1697         /* preallocate blocks in batch for one dnode page */
1698         if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1699                         (pgofs == end || dn.ofs_in_node == end_offset)) {
1700
1701                 dn.ofs_in_node = ofs_in_node;
1702                 err = f2fs_reserve_new_blocks(&dn, prealloc);
1703                 if (err)
1704                         goto sync_out;
1705
1706                 map->m_len += dn.ofs_in_node - ofs_in_node;
1707                 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1708                         err = -ENOSPC;
1709                         goto sync_out;
1710                 }
1711                 dn.ofs_in_node = end_offset;
1712         }
1713
1714         if (pgofs >= end)
1715                 goto sync_out;
1716         else if (dn.ofs_in_node < end_offset)
1717                 goto next_block;
1718
1719         if (flag == F2FS_GET_BLOCK_PRECACHE) {
1720                 if (map->m_flags & F2FS_MAP_MAPPED) {
1721                         unsigned int ofs = start_pgofs - map->m_lblk;
1722
1723                         f2fs_update_read_extent_cache_range(&dn,
1724                                 start_pgofs, map->m_pblk + ofs,
1725                                 map->m_len - ofs);
1726                 }
1727         }
1728
1729         f2fs_put_dnode(&dn);
1730
1731         if (map->m_may_create) {
1732                 f2fs_map_unlock(sbi, flag);
1733                 f2fs_balance_fs(sbi, dn.node_changed);
1734         }
1735         goto next_dnode;
1736
1737 sync_out:
1738
1739         if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1740                 /*
1741                  * for hardware encryption, but to avoid potential issue
1742                  * in future
1743                  */
1744                 f2fs_wait_on_block_writeback_range(inode,
1745                                                 map->m_pblk, map->m_len);
1746
1747                 if (map->m_multidev_dio) {
1748                         block_t blk_addr = map->m_pblk;
1749
1750                         bidx = f2fs_target_device_index(sbi, map->m_pblk);
1751
1752                         map->m_bdev = FDEV(bidx).bdev;
1753                         map->m_pblk -= FDEV(bidx).start_blk;
1754
1755                         if (map->m_may_create)
1756                                 f2fs_update_device_state(sbi, inode->i_ino,
1757                                                         blk_addr, map->m_len);
1758
1759                         f2fs_bug_on(sbi, blk_addr + map->m_len >
1760                                                 FDEV(bidx).end_blk + 1);
1761                 }
1762         }
1763
1764         if (flag == F2FS_GET_BLOCK_PRECACHE) {
1765                 if (map->m_flags & F2FS_MAP_MAPPED) {
1766                         unsigned int ofs = start_pgofs - map->m_lblk;
1767
1768                         f2fs_update_read_extent_cache_range(&dn,
1769                                 start_pgofs, map->m_pblk + ofs,
1770                                 map->m_len - ofs);
1771                 }
1772                 if (map->m_next_extent)
1773                         *map->m_next_extent = pgofs + 1;
1774         }
1775         f2fs_put_dnode(&dn);
1776 unlock_out:
1777         if (map->m_may_create) {
1778                 f2fs_map_unlock(sbi, flag);
1779                 f2fs_balance_fs(sbi, dn.node_changed);
1780         }
1781 out:
1782         trace_f2fs_map_blocks(inode, map, flag, err);
1783         return err;
1784 }
1785
1786 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1787 {
1788         struct f2fs_map_blocks map;
1789         block_t last_lblk;
1790         int err;
1791
1792         if (pos + len > i_size_read(inode))
1793                 return false;
1794
1795         map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1796         map.m_next_pgofs = NULL;
1797         map.m_next_extent = NULL;
1798         map.m_seg_type = NO_CHECK_TYPE;
1799         map.m_may_create = false;
1800         last_lblk = F2FS_BLK_ALIGN(pos + len);
1801
1802         while (map.m_lblk < last_lblk) {
1803                 map.m_len = last_lblk - map.m_lblk;
1804                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
1805                 if (err || map.m_len == 0)
1806                         return false;
1807                 map.m_lblk += map.m_len;
1808         }
1809         return true;
1810 }
1811
1812 static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1813 {
1814         return (bytes >> inode->i_blkbits);
1815 }
1816
1817 static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1818 {
1819         return (blks << inode->i_blkbits);
1820 }
1821
1822 static int f2fs_xattr_fiemap(struct inode *inode,
1823                                 struct fiemap_extent_info *fieinfo)
1824 {
1825         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1826         struct page *page;
1827         struct node_info ni;
1828         __u64 phys = 0, len;
1829         __u32 flags;
1830         nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1831         int err = 0;
1832
1833         if (f2fs_has_inline_xattr(inode)) {
1834                 int offset;
1835
1836                 page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1837                                                 inode->i_ino, false);
1838                 if (!page)
1839                         return -ENOMEM;
1840
1841                 err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
1842                 if (err) {
1843                         f2fs_put_page(page, 1);
1844                         return err;
1845                 }
1846
1847                 phys = blks_to_bytes(inode, ni.blk_addr);
1848                 offset = offsetof(struct f2fs_inode, i_addr) +
1849                                         sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1850                                         get_inline_xattr_addrs(inode));
1851
1852                 phys += offset;
1853                 len = inline_xattr_size(inode);
1854
1855                 f2fs_put_page(page, 1);
1856
1857                 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1858
1859                 if (!xnid)
1860                         flags |= FIEMAP_EXTENT_LAST;
1861
1862                 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1863                 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1864                 if (err)
1865                         return err;
1866         }
1867
1868         if (xnid) {
1869                 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1870                 if (!page)
1871                         return -ENOMEM;
1872
1873                 err = f2fs_get_node_info(sbi, xnid, &ni, false);
1874                 if (err) {
1875                         f2fs_put_page(page, 1);
1876                         return err;
1877                 }
1878
1879                 phys = blks_to_bytes(inode, ni.blk_addr);
1880                 len = inode->i_sb->s_blocksize;
1881
1882                 f2fs_put_page(page, 1);
1883
1884                 flags = FIEMAP_EXTENT_LAST;
1885         }
1886
1887         if (phys) {
1888                 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1889                 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1890         }
1891
1892         return (err < 0 ? err : 0);
1893 }
1894
1895 static loff_t max_inode_blocks(struct inode *inode)
1896 {
1897         loff_t result = ADDRS_PER_INODE(inode);
1898         loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1899
1900         /* two direct node blocks */
1901         result += (leaf_count * 2);
1902
1903         /* two indirect node blocks */
1904         leaf_count *= NIDS_PER_BLOCK;
1905         result += (leaf_count * 2);
1906
1907         /* one double indirect node block */
1908         leaf_count *= NIDS_PER_BLOCK;
1909         result += leaf_count;
1910
1911         return result;
1912 }
1913
1914 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1915                 u64 start, u64 len)
1916 {
1917         struct f2fs_map_blocks map;
1918         sector_t start_blk, last_blk;
1919         pgoff_t next_pgofs;
1920         u64 logical = 0, phys = 0, size = 0;
1921         u32 flags = 0;
1922         int ret = 0;
1923         bool compr_cluster = false, compr_appended;
1924         unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1925         unsigned int count_in_cluster = 0;
1926         loff_t maxbytes;
1927
1928         if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1929                 ret = f2fs_precache_extents(inode);
1930                 if (ret)
1931                         return ret;
1932         }
1933
1934         ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
1935         if (ret)
1936                 return ret;
1937
1938         inode_lock(inode);
1939
1940         maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
1941         if (start > maxbytes) {
1942                 ret = -EFBIG;
1943                 goto out;
1944         }
1945
1946         if (len > maxbytes || (maxbytes - len) < start)
1947                 len = maxbytes - start;
1948
1949         if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1950                 ret = f2fs_xattr_fiemap(inode, fieinfo);
1951                 goto out;
1952         }
1953
1954         if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
1955                 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1956                 if (ret != -EAGAIN)
1957                         goto out;
1958         }
1959
1960         if (bytes_to_blks(inode, len) == 0)
1961                 len = blks_to_bytes(inode, 1);
1962
1963         start_blk = bytes_to_blks(inode, start);
1964         last_blk = bytes_to_blks(inode, start + len - 1);
1965
1966 next:
1967         memset(&map, 0, sizeof(map));
1968         map.m_lblk = start_blk;
1969         map.m_len = bytes_to_blks(inode, len);
1970         map.m_next_pgofs = &next_pgofs;
1971         map.m_seg_type = NO_CHECK_TYPE;
1972
1973         if (compr_cluster) {
1974                 map.m_lblk += 1;
1975                 map.m_len = cluster_size - count_in_cluster;
1976         }
1977
1978         ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
1979         if (ret)
1980                 goto out;
1981
1982         /* HOLE */
1983         if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
1984                 start_blk = next_pgofs;
1985
1986                 if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
1987                                                 max_inode_blocks(inode)))
1988                         goto prep_next;
1989
1990                 flags |= FIEMAP_EXTENT_LAST;
1991         }
1992
1993         compr_appended = false;
1994         /* In a case of compressed cluster, append this to the last extent */
1995         if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
1996                         !(map.m_flags & F2FS_MAP_FLAGS))) {
1997                 compr_appended = true;
1998                 goto skip_fill;
1999         }
2000
2001         if (size) {
2002                 flags |= FIEMAP_EXTENT_MERGED;
2003                 if (IS_ENCRYPTED(inode))
2004                         flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
2005
2006                 ret = fiemap_fill_next_extent(fieinfo, logical,
2007                                 phys, size, flags);
2008                 trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
2009                 if (ret)
2010                         goto out;
2011                 size = 0;
2012         }
2013
2014         if (start_blk > last_blk)
2015                 goto out;
2016
2017 skip_fill:
2018         if (map.m_pblk == COMPRESS_ADDR) {
2019                 compr_cluster = true;
2020                 count_in_cluster = 1;
2021         } else if (compr_appended) {
2022                 unsigned int appended_blks = cluster_size -
2023                                                 count_in_cluster + 1;
2024                 size += blks_to_bytes(inode, appended_blks);
2025                 start_blk += appended_blks;
2026                 compr_cluster = false;
2027         } else {
2028                 logical = blks_to_bytes(inode, start_blk);
2029                 phys = __is_valid_data_blkaddr(map.m_pblk) ?
2030                         blks_to_bytes(inode, map.m_pblk) : 0;
2031                 size = blks_to_bytes(inode, map.m_len);
2032                 flags = 0;
2033
2034                 if (compr_cluster) {
2035                         flags = FIEMAP_EXTENT_ENCODED;
2036                         count_in_cluster += map.m_len;
2037                         if (count_in_cluster == cluster_size) {
2038                                 compr_cluster = false;
2039                                 size += blks_to_bytes(inode, 1);
2040                         }
2041                 } else if (map.m_flags & F2FS_MAP_DELALLOC) {
2042                         flags = FIEMAP_EXTENT_UNWRITTEN;
2043                 }
2044
2045                 start_blk += bytes_to_blks(inode, size);
2046         }
2047
2048 prep_next:
2049         cond_resched();
2050         if (fatal_signal_pending(current))
2051                 ret = -EINTR;
2052         else
2053                 goto next;
2054 out:
2055         if (ret == 1)
2056                 ret = 0;
2057
2058         inode_unlock(inode);
2059         return ret;
2060 }
2061
2062 static inline loff_t f2fs_readpage_limit(struct inode *inode)
2063 {
2064         if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
2065                 return inode->i_sb->s_maxbytes;
2066
2067         return i_size_read(inode);
2068 }
2069
2070 static int f2fs_read_single_page(struct inode *inode, struct page *page,
2071                                         unsigned nr_pages,
2072                                         struct f2fs_map_blocks *map,
2073                                         struct bio **bio_ret,
2074                                         sector_t *last_block_in_bio,
2075                                         bool is_readahead)
2076 {
2077         struct bio *bio = *bio_ret;
2078         const unsigned blocksize = blks_to_bytes(inode, 1);
2079         sector_t block_in_file;
2080         sector_t last_block;
2081         sector_t last_block_in_file;
2082         sector_t block_nr;
2083         int ret = 0;
2084
2085         block_in_file = (sector_t)page_index(page);
2086         last_block = block_in_file + nr_pages;
2087         last_block_in_file = bytes_to_blks(inode,
2088                         f2fs_readpage_limit(inode) + blocksize - 1);
2089         if (last_block > last_block_in_file)
2090                 last_block = last_block_in_file;
2091
2092         /* just zeroing out page which is beyond EOF */
2093         if (block_in_file >= last_block)
2094                 goto zero_out;
2095         /*
2096          * Map blocks using the previous result first.
2097          */
2098         if ((map->m_flags & F2FS_MAP_MAPPED) &&
2099                         block_in_file > map->m_lblk &&
2100                         block_in_file < (map->m_lblk + map->m_len))
2101                 goto got_it;
2102
2103         /*
2104          * Then do more f2fs_map_blocks() calls until we are
2105          * done with this page.
2106          */
2107         map->m_lblk = block_in_file;
2108         map->m_len = last_block - block_in_file;
2109
2110         ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT);
2111         if (ret)
2112                 goto out;
2113 got_it:
2114         if ((map->m_flags & F2FS_MAP_MAPPED)) {
2115                 block_nr = map->m_pblk + block_in_file - map->m_lblk;
2116                 SetPageMappedToDisk(page);
2117
2118                 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2119                                                 DATA_GENERIC_ENHANCE_READ)) {
2120                         ret = -EFSCORRUPTED;
2121                         f2fs_handle_error(F2FS_I_SB(inode),
2122                                                 ERROR_INVALID_BLKADDR);
2123                         goto out;
2124                 }
2125         } else {
2126 zero_out:
2127                 zero_user_segment(page, 0, PAGE_SIZE);
2128                 if (f2fs_need_verity(inode, page->index) &&
2129                     !fsverity_verify_page(page)) {
2130                         ret = -EIO;
2131                         goto out;
2132                 }
2133                 if (!PageUptodate(page))
2134                         SetPageUptodate(page);
2135                 unlock_page(page);
2136                 goto out;
2137         }
2138
2139         /*
2140          * This page will go to BIO.  Do we need to send this
2141          * BIO off first?
2142          */
2143         if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2144                                        *last_block_in_bio, block_nr) ||
2145                     !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2146 submit_and_realloc:
2147                 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2148                 bio = NULL;
2149         }
2150         if (bio == NULL) {
2151                 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
2152                                 is_readahead ? REQ_RAHEAD : 0, page->index,
2153                                 false);
2154                 if (IS_ERR(bio)) {
2155                         ret = PTR_ERR(bio);
2156                         bio = NULL;
2157                         goto out;
2158                 }
2159         }
2160
2161         /*
2162          * If the page is under writeback, we need to wait for
2163          * its completion to see the correct decrypted data.
2164          */
2165         f2fs_wait_on_block_writeback(inode, block_nr);
2166
2167         if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2168                 goto submit_and_realloc;
2169
2170         inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2171         f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
2172                                                         F2FS_BLKSIZE);
2173         *last_block_in_bio = block_nr;
2174         goto out;
2175 out:
2176         *bio_ret = bio;
2177         return ret;
2178 }
2179
2180 #ifdef CONFIG_F2FS_FS_COMPRESSION
2181 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2182                                 unsigned nr_pages, sector_t *last_block_in_bio,
2183                                 bool is_readahead, bool for_write)
2184 {
2185         struct dnode_of_data dn;
2186         struct inode *inode = cc->inode;
2187         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2188         struct bio *bio = *bio_ret;
2189         unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2190         sector_t last_block_in_file;
2191         const unsigned blocksize = blks_to_bytes(inode, 1);
2192         struct decompress_io_ctx *dic = NULL;
2193         struct extent_info ei = {};
2194         bool from_dnode = true;
2195         int i;
2196         int ret = 0;
2197
2198         f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2199
2200         last_block_in_file = bytes_to_blks(inode,
2201                         f2fs_readpage_limit(inode) + blocksize - 1);
2202
2203         /* get rid of pages beyond EOF */
2204         for (i = 0; i < cc->cluster_size; i++) {
2205                 struct page *page = cc->rpages[i];
2206
2207                 if (!page)
2208                         continue;
2209                 if ((sector_t)page->index >= last_block_in_file) {
2210                         zero_user_segment(page, 0, PAGE_SIZE);
2211                         if (!PageUptodate(page))
2212                                 SetPageUptodate(page);
2213                 } else if (!PageUptodate(page)) {
2214                         continue;
2215                 }
2216                 unlock_page(page);
2217                 if (for_write)
2218                         put_page(page);
2219                 cc->rpages[i] = NULL;
2220                 cc->nr_rpages--;
2221         }
2222
2223         /* we are done since all pages are beyond EOF */
2224         if (f2fs_cluster_is_empty(cc))
2225                 goto out;
2226
2227         if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
2228                 from_dnode = false;
2229
2230         if (!from_dnode)
2231                 goto skip_reading_dnode;
2232
2233         set_new_dnode(&dn, inode, NULL, NULL, 0);
2234         ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2235         if (ret)
2236                 goto out;
2237
2238         f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2239
2240 skip_reading_dnode:
2241         for (i = 1; i < cc->cluster_size; i++) {
2242                 block_t blkaddr;
2243
2244                 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2245                                         dn.ofs_in_node + i) :
2246                                         ei.blk + i - 1;
2247
2248                 if (!__is_valid_data_blkaddr(blkaddr))
2249                         break;
2250
2251                 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2252                         ret = -EFAULT;
2253                         goto out_put_dnode;
2254                 }
2255                 cc->nr_cpages++;
2256
2257                 if (!from_dnode && i >= ei.c_len)
2258                         break;
2259         }
2260
2261         /* nothing to decompress */
2262         if (cc->nr_cpages == 0) {
2263                 ret = 0;
2264                 goto out_put_dnode;
2265         }
2266
2267         dic = f2fs_alloc_dic(cc);
2268         if (IS_ERR(dic)) {
2269                 ret = PTR_ERR(dic);
2270                 goto out_put_dnode;
2271         }
2272
2273         for (i = 0; i < cc->nr_cpages; i++) {
2274                 struct page *page = dic->cpages[i];
2275                 block_t blkaddr;
2276                 struct bio_post_read_ctx *ctx;
2277
2278                 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2279                                         dn.ofs_in_node + i + 1) :
2280                                         ei.blk + i;
2281
2282                 f2fs_wait_on_block_writeback(inode, blkaddr);
2283
2284                 if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
2285                         if (atomic_dec_and_test(&dic->remaining_pages))
2286                                 f2fs_decompress_cluster(dic, true);
2287                         continue;
2288                 }
2289
2290                 if (bio && (!page_is_mergeable(sbi, bio,
2291                                         *last_block_in_bio, blkaddr) ||
2292                     !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2293 submit_and_realloc:
2294                         f2fs_submit_read_bio(sbi, bio, DATA);
2295                         bio = NULL;
2296                 }
2297
2298                 if (!bio) {
2299                         bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2300                                         is_readahead ? REQ_RAHEAD : 0,
2301                                         page->index, for_write);
2302                         if (IS_ERR(bio)) {
2303                                 ret = PTR_ERR(bio);
2304                                 f2fs_decompress_end_io(dic, ret, true);
2305                                 f2fs_put_dnode(&dn);
2306                                 *bio_ret = NULL;
2307                                 return ret;
2308                         }
2309                 }
2310
2311                 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2312                         goto submit_and_realloc;
2313
2314                 ctx = get_post_read_ctx(bio);
2315                 ctx->enabled_steps |= STEP_DECOMPRESS;
2316                 refcount_inc(&dic->refcnt);
2317
2318                 inc_page_count(sbi, F2FS_RD_DATA);
2319                 f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
2320                 *last_block_in_bio = blkaddr;
2321         }
2322
2323         if (from_dnode)
2324                 f2fs_put_dnode(&dn);
2325
2326         *bio_ret = bio;
2327         return 0;
2328
2329 out_put_dnode:
2330         if (from_dnode)
2331                 f2fs_put_dnode(&dn);
2332 out:
2333         for (i = 0; i < cc->cluster_size; i++) {
2334                 if (cc->rpages[i]) {
2335                         ClearPageUptodate(cc->rpages[i]);
2336                         unlock_page(cc->rpages[i]);
2337                 }
2338         }
2339         *bio_ret = bio;
2340         return ret;
2341 }
2342 #endif
2343
2344 /*
2345  * This function was originally taken from fs/mpage.c, and customized for f2fs.
2346  * Major change was from block_size == page_size in f2fs by default.
2347  */
2348 static int f2fs_mpage_readpages(struct inode *inode,
2349                 struct readahead_control *rac, struct page *page)
2350 {
2351         struct bio *bio = NULL;
2352         sector_t last_block_in_bio = 0;
2353         struct f2fs_map_blocks map;
2354 #ifdef CONFIG_F2FS_FS_COMPRESSION
2355         struct compress_ctx cc = {
2356                 .inode = inode,
2357                 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2358                 .cluster_size = F2FS_I(inode)->i_cluster_size,
2359                 .cluster_idx = NULL_CLUSTER,
2360                 .rpages = NULL,
2361                 .cpages = NULL,
2362                 .nr_rpages = 0,
2363                 .nr_cpages = 0,
2364         };
2365         pgoff_t nc_cluster_idx = NULL_CLUSTER;
2366 #endif
2367         unsigned nr_pages = rac ? readahead_count(rac) : 1;
2368         unsigned max_nr_pages = nr_pages;
2369         int ret = 0;
2370
2371         map.m_pblk = 0;
2372         map.m_lblk = 0;
2373         map.m_len = 0;
2374         map.m_flags = 0;
2375         map.m_next_pgofs = NULL;
2376         map.m_next_extent = NULL;
2377         map.m_seg_type = NO_CHECK_TYPE;
2378         map.m_may_create = false;
2379
2380         for (; nr_pages; nr_pages--) {
2381                 if (rac) {
2382                         page = readahead_page(rac);
2383                         prefetchw(&page->flags);
2384                 }
2385
2386 #ifdef CONFIG_F2FS_FS_COMPRESSION
2387                 if (f2fs_compressed_file(inode)) {
2388                         /* there are remained compressed pages, submit them */
2389                         if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
2390                                 ret = f2fs_read_multi_pages(&cc, &bio,
2391                                                         max_nr_pages,
2392                                                         &last_block_in_bio,
2393                                                         rac != NULL, false);
2394                                 f2fs_destroy_compress_ctx(&cc, false);
2395                                 if (ret)
2396                                         goto set_error_page;
2397                         }
2398                         if (cc.cluster_idx == NULL_CLUSTER) {
2399                                 if (nc_cluster_idx ==
2400                                         page->index >> cc.log_cluster_size) {
2401                                         goto read_single_page;
2402                                 }
2403
2404                                 ret = f2fs_is_compressed_cluster(inode, page->index);
2405                                 if (ret < 0)
2406                                         goto set_error_page;
2407                                 else if (!ret) {
2408                                         nc_cluster_idx =
2409                                                 page->index >> cc.log_cluster_size;
2410                                         goto read_single_page;
2411                                 }
2412
2413                                 nc_cluster_idx = NULL_CLUSTER;
2414                         }
2415                         ret = f2fs_init_compress_ctx(&cc);
2416                         if (ret)
2417                                 goto set_error_page;
2418
2419                         f2fs_compress_ctx_add_page(&cc, page);
2420
2421                         goto next_page;
2422                 }
2423 read_single_page:
2424 #endif
2425
2426                 ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
2427                                         &bio, &last_block_in_bio, rac);
2428                 if (ret) {
2429 #ifdef CONFIG_F2FS_FS_COMPRESSION
2430 set_error_page:
2431 #endif
2432                         zero_user_segment(page, 0, PAGE_SIZE);
2433                         unlock_page(page);
2434                 }
2435 #ifdef CONFIG_F2FS_FS_COMPRESSION
2436 next_page:
2437 #endif
2438                 if (rac)
2439                         put_page(page);
2440
2441 #ifdef CONFIG_F2FS_FS_COMPRESSION
2442                 if (f2fs_compressed_file(inode)) {
2443                         /* last page */
2444                         if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2445                                 ret = f2fs_read_multi_pages(&cc, &bio,
2446                                                         max_nr_pages,
2447                                                         &last_block_in_bio,
2448                                                         rac != NULL, false);
2449                                 f2fs_destroy_compress_ctx(&cc, false);
2450                         }
2451                 }
2452 #endif
2453         }
2454         if (bio)
2455                 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2456         return ret;
2457 }
2458
2459 static int f2fs_read_data_folio(struct file *file, struct folio *folio)
2460 {
2461         struct page *page = &folio->page;
2462         struct inode *inode = page_file_mapping(page)->host;
2463         int ret = -EAGAIN;
2464
2465         trace_f2fs_readpage(page, DATA);
2466
2467         if (!f2fs_is_compress_backend_ready(inode)) {
2468                 unlock_page(page);
2469                 return -EOPNOTSUPP;
2470         }
2471
2472         /* If the file has inline data, try to read it directly */
2473         if (f2fs_has_inline_data(inode))
2474                 ret = f2fs_read_inline_data(inode, page);
2475         if (ret == -EAGAIN)
2476                 ret = f2fs_mpage_readpages(inode, NULL, page);
2477         return ret;
2478 }
2479
2480 static void f2fs_readahead(struct readahead_control *rac)
2481 {
2482         struct inode *inode = rac->mapping->host;
2483
2484         trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
2485
2486         if (!f2fs_is_compress_backend_ready(inode))
2487                 return;
2488
2489         /* If the file has inline data, skip readahead */
2490         if (f2fs_has_inline_data(inode))
2491                 return;
2492
2493         f2fs_mpage_readpages(inode, rac, NULL);
2494 }
2495
2496 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2497 {
2498         struct inode *inode = fio->page->mapping->host;
2499         struct page *mpage, *page;
2500         gfp_t gfp_flags = GFP_NOFS;
2501
2502         if (!f2fs_encrypted_file(inode))
2503                 return 0;
2504
2505         page = fio->compressed_page ? fio->compressed_page : fio->page;
2506
2507         /* wait for GCed page writeback via META_MAPPING */
2508         f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2509
2510         if (fscrypt_inode_uses_inline_crypto(inode))
2511                 return 0;
2512
2513 retry_encrypt:
2514         fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2515                                         PAGE_SIZE, 0, gfp_flags);
2516         if (IS_ERR(fio->encrypted_page)) {
2517                 /* flush pending IOs and wait for a while in the ENOMEM case */
2518                 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2519                         f2fs_flush_merged_writes(fio->sbi);
2520                         memalloc_retry_wait(GFP_NOFS);
2521                         gfp_flags |= __GFP_NOFAIL;
2522                         goto retry_encrypt;
2523                 }
2524                 return PTR_ERR(fio->encrypted_page);
2525         }
2526
2527         mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
2528         if (mpage) {
2529                 if (PageUptodate(mpage))
2530                         memcpy(page_address(mpage),
2531                                 page_address(fio->encrypted_page), PAGE_SIZE);
2532                 f2fs_put_page(mpage, 1);
2533         }
2534         return 0;
2535 }
2536
2537 static inline bool check_inplace_update_policy(struct inode *inode,
2538                                 struct f2fs_io_info *fio)
2539 {
2540         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2541
2542         if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) &&
2543             is_inode_flag_set(inode, FI_OPU_WRITE))
2544                 return false;
2545         if (IS_F2FS_IPU_FORCE(sbi))
2546                 return true;
2547         if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi))
2548                 return true;
2549         if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
2550                 return true;
2551         if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) &&
2552             utilization(sbi) > SM_I(sbi)->min_ipu_util)
2553                 return true;
2554
2555         /*
2556          * IPU for rewrite async pages
2557          */
2558         if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE &&
2559             !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode))
2560                 return true;
2561
2562         /* this is only set during fdatasync */
2563         if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU))
2564                 return true;
2565
2566         if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2567                         !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2568                 return true;
2569
2570         return false;
2571 }
2572
2573 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2574 {
2575         /* swap file is migrating in aligned write mode */
2576         if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2577                 return false;
2578
2579         if (f2fs_is_pinned_file(inode))
2580                 return true;
2581
2582         /* if this is cold file, we should overwrite to avoid fragmentation */
2583         if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
2584                 return true;
2585
2586         return check_inplace_update_policy(inode, fio);
2587 }
2588
2589 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2590 {
2591         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2592
2593         /* The below cases were checked when setting it. */
2594         if (f2fs_is_pinned_file(inode))
2595                 return false;
2596         if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2597                 return true;
2598         if (f2fs_lfs_mode(sbi))
2599                 return true;
2600         if (S_ISDIR(inode->i_mode))
2601                 return true;
2602         if (IS_NOQUOTA(inode))
2603                 return true;
2604         if (f2fs_is_atomic_file(inode))
2605                 return true;
2606
2607         /* swap file is migrating in aligned write mode */
2608         if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2609                 return true;
2610
2611         if (is_inode_flag_set(inode, FI_OPU_WRITE))
2612                 return true;
2613
2614         if (fio) {
2615                 if (page_private_gcing(fio->page))
2616                         return true;
2617                 if (page_private_dummy(fio->page))
2618                         return true;
2619                 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2620                         f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2621                         return true;
2622         }
2623         return false;
2624 }
2625
2626 static inline bool need_inplace_update(struct f2fs_io_info *fio)
2627 {
2628         struct inode *inode = fio->page->mapping->host;
2629
2630         if (f2fs_should_update_outplace(inode, fio))
2631                 return false;
2632
2633         return f2fs_should_update_inplace(inode, fio);
2634 }
2635
2636 int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2637 {
2638         struct page *page = fio->page;
2639         struct inode *inode = page->mapping->host;
2640         struct dnode_of_data dn;
2641         struct node_info ni;
2642         bool ipu_force = false;
2643         int err = 0;
2644
2645         /* Use COW inode to make dnode_of_data for atomic write */
2646         if (f2fs_is_atomic_file(inode))
2647                 set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
2648         else
2649                 set_new_dnode(&dn, inode, NULL, NULL, 0);
2650
2651         if (need_inplace_update(fio) &&
2652             f2fs_lookup_read_extent_cache_block(inode, page->index,
2653                                                 &fio->old_blkaddr)) {
2654                 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2655                                                 DATA_GENERIC_ENHANCE)) {
2656                         f2fs_handle_error(fio->sbi,
2657                                                 ERROR_INVALID_BLKADDR);
2658                         return -EFSCORRUPTED;
2659                 }
2660
2661                 ipu_force = true;
2662                 fio->need_lock = LOCK_DONE;
2663                 goto got_it;
2664         }
2665
2666         /* Deadlock due to between page->lock and f2fs_lock_op */
2667         if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
2668                 return -EAGAIN;
2669
2670         err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
2671         if (err)
2672                 goto out;
2673
2674         fio->old_blkaddr = dn.data_blkaddr;
2675
2676         /* This page is already truncated */
2677         if (fio->old_blkaddr == NULL_ADDR) {
2678                 ClearPageUptodate(page);
2679                 clear_page_private_gcing(page);
2680                 goto out_writepage;
2681         }
2682 got_it:
2683         if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2684                 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2685                                                 DATA_GENERIC_ENHANCE)) {
2686                 err = -EFSCORRUPTED;
2687                 f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
2688                 goto out_writepage;
2689         }
2690
2691         /*
2692          * If current allocation needs SSR,
2693          * it had better in-place writes for updated data.
2694          */
2695         if (ipu_force ||
2696                 (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2697                                         need_inplace_update(fio))) {
2698                 err = f2fs_encrypt_one_page(fio);
2699                 if (err)
2700                         goto out_writepage;
2701
2702                 set_page_writeback(page);
2703                 f2fs_put_dnode(&dn);
2704                 if (fio->need_lock == LOCK_REQ)
2705                         f2fs_unlock_op(fio->sbi);
2706                 err = f2fs_inplace_write_data(fio);
2707                 if (err) {
2708                         if (fscrypt_inode_uses_fs_layer_crypto(inode))
2709                                 fscrypt_finalize_bounce_page(&fio->encrypted_page);
2710                         if (PageWriteback(page))
2711                                 end_page_writeback(page);
2712                 } else {
2713                         set_inode_flag(inode, FI_UPDATE_WRITE);
2714                 }
2715                 trace_f2fs_do_write_data_page(fio->page, IPU);
2716                 return err;
2717         }
2718
2719         if (fio->need_lock == LOCK_RETRY) {
2720                 if (!f2fs_trylock_op(fio->sbi)) {
2721                         err = -EAGAIN;
2722                         goto out_writepage;
2723                 }
2724                 fio->need_lock = LOCK_REQ;
2725         }
2726
2727         err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
2728         if (err)
2729                 goto out_writepage;
2730
2731         fio->version = ni.version;
2732
2733         err = f2fs_encrypt_one_page(fio);
2734         if (err)
2735                 goto out_writepage;
2736
2737         set_page_writeback(page);
2738
2739         if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2740                 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
2741
2742         /* LFS mode write path */
2743         f2fs_outplace_write_data(&dn, fio);
2744         trace_f2fs_do_write_data_page(page, OPU);
2745         set_inode_flag(inode, FI_APPEND_WRITE);
2746         if (page->index == 0)
2747                 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
2748 out_writepage:
2749         f2fs_put_dnode(&dn);
2750 out:
2751         if (fio->need_lock == LOCK_REQ)
2752                 f2fs_unlock_op(fio->sbi);
2753         return err;
2754 }
2755
2756 int f2fs_write_single_data_page(struct page *page, int *submitted,
2757                                 struct bio **bio,
2758                                 sector_t *last_block,
2759                                 struct writeback_control *wbc,
2760                                 enum iostat_type io_type,
2761                                 int compr_blocks,
2762                                 bool allow_balance)
2763 {
2764         struct inode *inode = page->mapping->host;
2765         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2766         loff_t i_size = i_size_read(inode);
2767         const pgoff_t end_index = ((unsigned long long)i_size)
2768                                                         >> PAGE_SHIFT;
2769         loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
2770         unsigned offset = 0;
2771         bool need_balance_fs = false;
2772         int err = 0;
2773         struct f2fs_io_info fio = {
2774                 .sbi = sbi,
2775                 .ino = inode->i_ino,
2776                 .type = DATA,
2777                 .op = REQ_OP_WRITE,
2778                 .op_flags = wbc_to_write_flags(wbc),
2779                 .old_blkaddr = NULL_ADDR,
2780                 .page = page,
2781                 .encrypted_page = NULL,
2782                 .submitted = 0,
2783                 .compr_blocks = compr_blocks,
2784                 .need_lock = LOCK_RETRY,
2785                 .post_read = f2fs_post_read_required(inode) ? 1 : 0,
2786                 .io_type = io_type,
2787                 .io_wbc = wbc,
2788                 .bio = bio,
2789                 .last_block = last_block,
2790         };
2791
2792         trace_f2fs_writepage(page, DATA);
2793
2794         /* we should bypass data pages to proceed the kworker jobs */
2795         if (unlikely(f2fs_cp_error(sbi))) {
2796                 mapping_set_error(page->mapping, -EIO);
2797                 /*
2798                  * don't drop any dirty dentry pages for keeping lastest
2799                  * directory structure.
2800                  */
2801                 if (S_ISDIR(inode->i_mode))
2802                         goto redirty_out;
2803                 goto out;
2804         }
2805
2806         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2807                 goto redirty_out;
2808
2809         if (page->index < end_index ||
2810                         f2fs_verity_in_progress(inode) ||
2811                         compr_blocks)
2812                 goto write;
2813
2814         /*
2815          * If the offset is out-of-range of file size,
2816          * this page does not have to be written to disk.
2817          */
2818         offset = i_size & (PAGE_SIZE - 1);
2819         if ((page->index >= end_index + 1) || !offset)
2820                 goto out;
2821
2822         zero_user_segment(page, offset, PAGE_SIZE);
2823 write:
2824         if (f2fs_is_drop_cache(inode))
2825                 goto out;
2826
2827         /* Dentry/quota blocks are controlled by checkpoint */
2828         if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
2829                 /*
2830                  * We need to wait for node_write to avoid block allocation during
2831                  * checkpoint. This can only happen to quota writes which can cause
2832                  * the below discard race condition.
2833                  */
2834                 if (IS_NOQUOTA(inode))
2835                         f2fs_down_read(&sbi->node_write);
2836
2837                 fio.need_lock = LOCK_DONE;
2838                 err = f2fs_do_write_data_page(&fio);
2839
2840                 if (IS_NOQUOTA(inode))
2841                         f2fs_up_read(&sbi->node_write);
2842
2843                 goto done;
2844         }
2845
2846         if (!wbc->for_reclaim)
2847                 need_balance_fs = true;
2848         else if (has_not_enough_free_secs(sbi, 0, 0))
2849                 goto redirty_out;
2850         else
2851                 set_inode_flag(inode, FI_HOT_DATA);
2852
2853         err = -EAGAIN;
2854         if (f2fs_has_inline_data(inode)) {
2855                 err = f2fs_write_inline_data(inode, page);
2856                 if (!err)
2857                         goto out;
2858         }
2859
2860         if (err == -EAGAIN) {
2861                 err = f2fs_do_write_data_page(&fio);
2862                 if (err == -EAGAIN) {
2863                         fio.need_lock = LOCK_REQ;
2864                         err = f2fs_do_write_data_page(&fio);
2865                 }
2866         }
2867
2868         if (err) {
2869                 file_set_keep_isize(inode);
2870         } else {
2871                 spin_lock(&F2FS_I(inode)->i_size_lock);
2872                 if (F2FS_I(inode)->last_disk_size < psize)
2873                         F2FS_I(inode)->last_disk_size = psize;
2874                 spin_unlock(&F2FS_I(inode)->i_size_lock);
2875         }
2876
2877 done:
2878         if (err && err != -ENOENT)
2879                 goto redirty_out;
2880
2881 out:
2882         inode_dec_dirty_pages(inode);
2883         if (err) {
2884                 ClearPageUptodate(page);
2885                 clear_page_private_gcing(page);
2886         }
2887
2888         if (wbc->for_reclaim) {
2889                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2890                 clear_inode_flag(inode, FI_HOT_DATA);
2891                 f2fs_remove_dirty_inode(inode);
2892                 submitted = NULL;
2893         }
2894         unlock_page(page);
2895         if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2896                         !F2FS_I(inode)->wb_task && allow_balance)
2897                 f2fs_balance_fs(sbi, need_balance_fs);
2898
2899         if (unlikely(f2fs_cp_error(sbi))) {
2900                 f2fs_submit_merged_write(sbi, DATA);
2901                 f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2902                 submitted = NULL;
2903         }
2904
2905         if (submitted)
2906                 *submitted = fio.submitted;
2907
2908         return 0;
2909
2910 redirty_out:
2911         redirty_page_for_writepage(wbc, page);
2912         /*
2913          * pageout() in MM translates EAGAIN, so calls handle_write_error()
2914          * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2915          * file_write_and_wait_range() will see EIO error, which is critical
2916          * to return value of fsync() followed by atomic_write failure to user.
2917          */
2918         if (!err || wbc->for_reclaim)
2919                 return AOP_WRITEPAGE_ACTIVATE;
2920         unlock_page(page);
2921         return err;
2922 }
2923
2924 static int f2fs_write_data_page(struct page *page,
2925                                         struct writeback_control *wbc)
2926 {
2927 #ifdef CONFIG_F2FS_FS_COMPRESSION
2928         struct inode *inode = page->mapping->host;
2929
2930         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2931                 goto out;
2932
2933         if (f2fs_compressed_file(inode)) {
2934                 if (f2fs_is_compressed_cluster(inode, page->index)) {
2935                         redirty_page_for_writepage(wbc, page);
2936                         return AOP_WRITEPAGE_ACTIVATE;
2937                 }
2938         }
2939 out:
2940 #endif
2941
2942         return f2fs_write_single_data_page(page, NULL, NULL, NULL,
2943                                                 wbc, FS_DATA_IO, 0, true);
2944 }
2945
2946 /*
2947  * This function was copied from write_cache_pages from mm/page-writeback.c.
2948  * The major change is making write step of cold data page separately from
2949  * warm/hot data page.
2950  */
2951 static int f2fs_write_cache_pages(struct address_space *mapping,
2952                                         struct writeback_control *wbc,
2953                                         enum iostat_type io_type)
2954 {
2955         int ret = 0;
2956         int done = 0, retry = 0;
2957         struct page *pages[F2FS_ONSTACK_PAGES];
2958         struct folio_batch fbatch;
2959         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2960         struct bio *bio = NULL;
2961         sector_t last_block;
2962 #ifdef CONFIG_F2FS_FS_COMPRESSION
2963         struct inode *inode = mapping->host;
2964         struct compress_ctx cc = {
2965                 .inode = inode,
2966                 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2967                 .cluster_size = F2FS_I(inode)->i_cluster_size,
2968                 .cluster_idx = NULL_CLUSTER,
2969                 .rpages = NULL,
2970                 .nr_rpages = 0,
2971                 .cpages = NULL,
2972                 .valid_nr_cpages = 0,
2973                 .rbuf = NULL,
2974                 .cbuf = NULL,
2975                 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
2976                 .private = NULL,
2977         };
2978 #endif
2979         int nr_folios, p, idx;
2980         int nr_pages;
2981         pgoff_t index;
2982         pgoff_t end;            /* Inclusive */
2983         pgoff_t done_index;
2984         int range_whole = 0;
2985         xa_mark_t tag;
2986         int nwritten = 0;
2987         int submitted = 0;
2988         int i;
2989
2990         folio_batch_init(&fbatch);
2991
2992         if (get_dirty_pages(mapping->host) <=
2993                                 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
2994                 set_inode_flag(mapping->host, FI_HOT_DATA);
2995         else
2996                 clear_inode_flag(mapping->host, FI_HOT_DATA);
2997
2998         if (wbc->range_cyclic) {
2999                 index = mapping->writeback_index; /* prev offset */
3000                 end = -1;
3001         } else {
3002                 index = wbc->range_start >> PAGE_SHIFT;
3003                 end = wbc->range_end >> PAGE_SHIFT;
3004                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3005                         range_whole = 1;
3006         }
3007         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3008                 tag = PAGECACHE_TAG_TOWRITE;
3009         else
3010                 tag = PAGECACHE_TAG_DIRTY;
3011 retry:
3012         retry = 0;
3013         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3014                 tag_pages_for_writeback(mapping, index, end);
3015         done_index = index;
3016         while (!done && !retry && (index <= end)) {
3017                 nr_pages = 0;
3018 again:
3019                 nr_folios = filemap_get_folios_tag(mapping, &index, end,
3020                                 tag, &fbatch);
3021                 if (nr_folios == 0) {
3022                         if (nr_pages)
3023                                 goto write;
3024                         break;
3025                 }
3026
3027                 for (i = 0; i < nr_folios; i++) {
3028                         struct folio *folio = fbatch.folios[i];
3029
3030                         idx = 0;
3031                         p = folio_nr_pages(folio);
3032 add_more:
3033                         pages[nr_pages] = folio_page(folio, idx);
3034                         folio_get(folio);
3035                         if (++nr_pages == F2FS_ONSTACK_PAGES) {
3036                                 index = folio->index + idx + 1;
3037                                 folio_batch_release(&fbatch);
3038                                 goto write;
3039                         }
3040                         if (++idx < p)
3041                                 goto add_more;
3042                 }
3043                 folio_batch_release(&fbatch);
3044                 goto again;
3045 write:
3046                 for (i = 0; i < nr_pages; i++) {
3047                         struct page *page = pages[i];
3048                         struct folio *folio = page_folio(page);
3049                         bool need_readd;
3050 readd:
3051                         need_readd = false;
3052 #ifdef CONFIG_F2FS_FS_COMPRESSION
3053                         if (f2fs_compressed_file(inode)) {
3054                                 void *fsdata = NULL;
3055                                 struct page *pagep;
3056                                 int ret2;
3057
3058                                 ret = f2fs_init_compress_ctx(&cc);
3059                                 if (ret) {
3060                                         done = 1;
3061                                         break;
3062                                 }
3063
3064                                 if (!f2fs_cluster_can_merge_page(&cc,
3065                                                                 folio->index)) {
3066                                         ret = f2fs_write_multi_pages(&cc,
3067                                                 &submitted, wbc, io_type);
3068                                         if (!ret)
3069                                                 need_readd = true;
3070                                         goto result;
3071                                 }
3072
3073                                 if (unlikely(f2fs_cp_error(sbi)))
3074                                         goto lock_folio;
3075
3076                                 if (!f2fs_cluster_is_empty(&cc))
3077                                         goto lock_folio;
3078
3079                                 if (f2fs_all_cluster_page_ready(&cc,
3080                                         pages, i, nr_pages, true))
3081                                         goto lock_folio;
3082
3083                                 ret2 = f2fs_prepare_compress_overwrite(
3084                                                         inode, &pagep,
3085                                                         folio->index, &fsdata);
3086                                 if (ret2 < 0) {
3087                                         ret = ret2;
3088                                         done = 1;
3089                                         break;
3090                                 } else if (ret2 &&
3091                                         (!f2fs_compress_write_end(inode,
3092                                                 fsdata, folio->index, 1) ||
3093                                          !f2fs_all_cluster_page_ready(&cc,
3094                                                 pages, i, nr_pages,
3095                                                 false))) {
3096                                         retry = 1;
3097                                         break;
3098                                 }
3099                         }
3100 #endif
3101                         /* give a priority to WB_SYNC threads */
3102                         if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3103                                         wbc->sync_mode == WB_SYNC_NONE) {
3104                                 done = 1;
3105                                 break;
3106                         }
3107 #ifdef CONFIG_F2FS_FS_COMPRESSION
3108 lock_folio:
3109 #endif
3110                         done_index = folio->index;
3111 retry_write:
3112                         folio_lock(folio);
3113
3114                         if (unlikely(folio->mapping != mapping)) {
3115 continue_unlock:
3116                                 folio_unlock(folio);
3117                                 continue;
3118                         }
3119
3120                         if (!folio_test_dirty(folio)) {
3121                                 /* someone wrote it for us */
3122                                 goto continue_unlock;
3123                         }
3124
3125                         if (folio_test_writeback(folio)) {
3126                                 if (wbc->sync_mode != WB_SYNC_NONE)
3127                                         f2fs_wait_on_page_writeback(
3128                                                         &folio->page,
3129                                                         DATA, true, true);
3130                                 else
3131                                         goto continue_unlock;
3132                         }
3133
3134                         if (!folio_clear_dirty_for_io(folio))
3135                                 goto continue_unlock;
3136
3137 #ifdef CONFIG_F2FS_FS_COMPRESSION
3138                         if (f2fs_compressed_file(inode)) {
3139                                 folio_get(folio);
3140                                 f2fs_compress_ctx_add_page(&cc, &folio->page);
3141                                 continue;
3142                         }
3143 #endif
3144                         ret = f2fs_write_single_data_page(&folio->page,
3145                                         &submitted, &bio, &last_block,
3146                                         wbc, io_type, 0, true);
3147                         if (ret == AOP_WRITEPAGE_ACTIVATE)
3148                                 folio_unlock(folio);
3149 #ifdef CONFIG_F2FS_FS_COMPRESSION
3150 result:
3151 #endif
3152                         nwritten += submitted;
3153                         wbc->nr_to_write -= submitted;
3154
3155                         if (unlikely(ret)) {
3156                                 /*
3157                                  * keep nr_to_write, since vfs uses this to
3158                                  * get # of written pages.
3159                                  */
3160                                 if (ret == AOP_WRITEPAGE_ACTIVATE) {
3161                                         ret = 0;
3162                                         goto next;
3163                                 } else if (ret == -EAGAIN) {
3164                                         ret = 0;
3165                                         if (wbc->sync_mode == WB_SYNC_ALL) {
3166                                                 f2fs_io_schedule_timeout(
3167                                                         DEFAULT_IO_TIMEOUT);
3168                                                 goto retry_write;
3169                                         }
3170                                         goto next;
3171                                 }
3172                                 done_index = folio->index +
3173                                         folio_nr_pages(folio);
3174                                 done = 1;
3175                                 break;
3176                         }
3177
3178                         if (wbc->nr_to_write <= 0 &&
3179                                         wbc->sync_mode == WB_SYNC_NONE) {
3180                                 done = 1;
3181                                 break;
3182                         }
3183 next:
3184                         if (need_readd)
3185                                 goto readd;
3186                 }
3187                 release_pages(pages, nr_pages);
3188                 cond_resched();
3189         }
3190 #ifdef CONFIG_F2FS_FS_COMPRESSION
3191         /* flush remained pages in compress cluster */
3192         if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3193                 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3194                 nwritten += submitted;
3195                 wbc->nr_to_write -= submitted;
3196                 if (ret) {
3197                         done = 1;
3198                         retry = 0;
3199                 }
3200         }
3201         if (f2fs_compressed_file(inode))
3202                 f2fs_destroy_compress_ctx(&cc, false);
3203 #endif
3204         if (retry) {
3205                 index = 0;
3206                 end = -1;
3207                 goto retry;
3208         }
3209         if (wbc->range_cyclic && !done)
3210                 done_index = 0;
3211         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3212                 mapping->writeback_index = done_index;
3213
3214         if (nwritten)
3215                 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3216                                                                 NULL, 0, DATA);
3217         /* submit cached bio of IPU write */
3218         if (bio)
3219                 f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3220
3221         return ret;
3222 }
3223
3224 static inline bool __should_serialize_io(struct inode *inode,
3225                                         struct writeback_control *wbc)
3226 {
3227         /* to avoid deadlock in path of data flush */
3228         if (F2FS_I(inode)->wb_task)
3229                 return false;
3230
3231         if (!S_ISREG(inode->i_mode))
3232                 return false;
3233         if (IS_NOQUOTA(inode))
3234                 return false;
3235
3236         if (f2fs_need_compress_data(inode))
3237                 return true;
3238         if (wbc->sync_mode != WB_SYNC_ALL)
3239                 return true;
3240         if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3241                 return true;
3242         return false;
3243 }
3244
3245 static int __f2fs_write_data_pages(struct address_space *mapping,
3246                                                 struct writeback_control *wbc,
3247                                                 enum iostat_type io_type)
3248 {
3249         struct inode *inode = mapping->host;
3250         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3251         struct blk_plug plug;
3252         int ret;
3253         bool locked = false;
3254
3255         /* deal with chardevs and other special file */
3256         if (!mapping->a_ops->writepage)
3257                 return 0;
3258
3259         /* skip writing if there is no dirty page in this inode */
3260         if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3261                 return 0;
3262
3263         /* during POR, we don't need to trigger writepage at all. */
3264         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3265                 goto skip_write;
3266
3267         if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3268                         wbc->sync_mode == WB_SYNC_NONE &&
3269                         get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3270                         f2fs_available_free_memory(sbi, DIRTY_DENTS))
3271                 goto skip_write;
3272
3273         /* skip writing in file defragment preparing stage */
3274         if (is_inode_flag_set(inode, FI_SKIP_WRITES))
3275                 goto skip_write;
3276
3277         trace_f2fs_writepages(mapping->host, wbc, DATA);
3278
3279         /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3280         if (wbc->sync_mode == WB_SYNC_ALL)
3281                 atomic_inc(&sbi->wb_sync_req[DATA]);
3282         else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3283                 /* to avoid potential deadlock */
3284                 if (current->plug)
3285                         blk_finish_plug(current->plug);
3286                 goto skip_write;
3287         }
3288
3289         if (__should_serialize_io(inode, wbc)) {
3290                 mutex_lock(&sbi->writepages);
3291                 locked = true;
3292         }
3293
3294         blk_start_plug(&plug);
3295         ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3296         blk_finish_plug(&plug);
3297
3298         if (locked)
3299                 mutex_unlock(&sbi->writepages);
3300
3301         if (wbc->sync_mode == WB_SYNC_ALL)
3302                 atomic_dec(&sbi->wb_sync_req[DATA]);
3303         /*
3304          * if some pages were truncated, we cannot guarantee its mapping->host
3305          * to detect pending bios.
3306          */
3307
3308         f2fs_remove_dirty_inode(inode);
3309         return ret;
3310
3311 skip_write:
3312         wbc->pages_skipped += get_dirty_pages(inode);
3313         trace_f2fs_writepages(mapping->host, wbc, DATA);
3314         return 0;
3315 }
3316
3317 static int f2fs_write_data_pages(struct address_space *mapping,
3318                             struct writeback_control *wbc)
3319 {
3320         struct inode *inode = mapping->host;
3321
3322         return __f2fs_write_data_pages(mapping, wbc,
3323                         F2FS_I(inode)->cp_task == current ?
3324                         FS_CP_DATA_IO : FS_DATA_IO);
3325 }
3326
3327 void f2fs_write_failed(struct inode *inode, loff_t to)
3328 {
3329         loff_t i_size = i_size_read(inode);
3330
3331         if (IS_NOQUOTA(inode))
3332                 return;
3333
3334         /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3335         if (to > i_size && !f2fs_verity_in_progress(inode)) {
3336                 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3337                 filemap_invalidate_lock(inode->i_mapping);
3338
3339                 truncate_pagecache(inode, i_size);
3340                 f2fs_truncate_blocks(inode, i_size, true);
3341
3342                 filemap_invalidate_unlock(inode->i_mapping);
3343                 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3344         }
3345 }
3346
3347 static int prepare_write_begin(struct f2fs_sb_info *sbi,
3348                         struct page *page, loff_t pos, unsigned len,
3349                         block_t *blk_addr, bool *node_changed)
3350 {
3351         struct inode *inode = page->mapping->host;
3352         pgoff_t index = page->index;
3353         struct dnode_of_data dn;
3354         struct page *ipage;
3355         bool locked = false;
3356         int flag = F2FS_GET_BLOCK_PRE_AIO;
3357         int err = 0;
3358
3359         /*
3360          * If a whole page is being written and we already preallocated all the
3361          * blocks, then there is no need to get a block address now.
3362          */
3363         if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
3364                 return 0;
3365
3366         /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3367         if (f2fs_has_inline_data(inode)) {
3368                 if (pos + len > MAX_INLINE_DATA(inode))
3369                         flag = F2FS_GET_BLOCK_DEFAULT;
3370                 f2fs_map_lock(sbi, flag);
3371                 locked = true;
3372         } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
3373                 f2fs_map_lock(sbi, flag);
3374                 locked = true;
3375         }
3376
3377 restart:
3378         /* check inline_data */
3379         ipage = f2fs_get_node_page(sbi, inode->i_ino);
3380         if (IS_ERR(ipage)) {
3381                 err = PTR_ERR(ipage);
3382                 goto unlock_out;
3383         }
3384
3385         set_new_dnode(&dn, inode, ipage, ipage, 0);
3386
3387         if (f2fs_has_inline_data(inode)) {
3388                 if (pos + len <= MAX_INLINE_DATA(inode)) {
3389                         f2fs_do_read_inline_data(page, ipage);
3390                         set_inode_flag(inode, FI_DATA_EXIST);
3391                         if (inode->i_nlink)
3392                                 set_page_private_inline(ipage);
3393                         goto out;
3394                 }
3395                 err = f2fs_convert_inline_page(&dn, page);
3396                 if (err || dn.data_blkaddr != NULL_ADDR)
3397                         goto out;
3398         }
3399
3400         if (!f2fs_lookup_read_extent_cache_block(inode, index,
3401                                                  &dn.data_blkaddr)) {
3402                 if (locked) {
3403                         err = f2fs_reserve_block(&dn, index);
3404                         goto out;
3405                 }
3406
3407                 /* hole case */
3408                 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3409                 if (!err && dn.data_blkaddr != NULL_ADDR)
3410                         goto out;
3411                 f2fs_put_dnode(&dn);
3412                 f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3413                 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3414                 locked = true;
3415                 goto restart;
3416         }
3417 out:
3418         if (!err) {
3419                 /* convert_inline_page can make node_changed */
3420                 *blk_addr = dn.data_blkaddr;
3421                 *node_changed = dn.node_changed;
3422         }
3423         f2fs_put_dnode(&dn);
3424 unlock_out:
3425         if (locked)
3426                 f2fs_map_unlock(sbi, flag);
3427         return err;
3428 }
3429
3430 static int __find_data_block(struct inode *inode, pgoff_t index,
3431                                 block_t *blk_addr)
3432 {
3433         struct dnode_of_data dn;
3434         struct page *ipage;
3435         int err = 0;
3436
3437         ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
3438         if (IS_ERR(ipage))
3439                 return PTR_ERR(ipage);
3440
3441         set_new_dnode(&dn, inode, ipage, ipage, 0);
3442
3443         if (!f2fs_lookup_read_extent_cache_block(inode, index,
3444                                                  &dn.data_blkaddr)) {
3445                 /* hole case */
3446                 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3447                 if (err) {
3448                         dn.data_blkaddr = NULL_ADDR;
3449                         err = 0;
3450                 }
3451         }
3452         *blk_addr = dn.data_blkaddr;
3453         f2fs_put_dnode(&dn);
3454         return err;
3455 }
3456
3457 static int __reserve_data_block(struct inode *inode, pgoff_t index,
3458                                 block_t *blk_addr, bool *node_changed)
3459 {
3460         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3461         struct dnode_of_data dn;
3462         struct page *ipage;
3463         int err = 0;
3464
3465         f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3466
3467         ipage = f2fs_get_node_page(sbi, inode->i_ino);
3468         if (IS_ERR(ipage)) {
3469                 err = PTR_ERR(ipage);
3470                 goto unlock_out;
3471         }
3472         set_new_dnode(&dn, inode, ipage, ipage, 0);
3473
3474         if (!f2fs_lookup_read_extent_cache_block(dn.inode, index,
3475                                                 &dn.data_blkaddr))
3476                 err = f2fs_reserve_block(&dn, index);
3477
3478         *blk_addr = dn.data_blkaddr;
3479         *node_changed = dn.node_changed;
3480         f2fs_put_dnode(&dn);
3481
3482 unlock_out:
3483         f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3484         return err;
3485 }
3486
3487 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
3488                         struct page *page, loff_t pos, unsigned int len,
3489                         block_t *blk_addr, bool *node_changed)
3490 {
3491         struct inode *inode = page->mapping->host;
3492         struct inode *cow_inode = F2FS_I(inode)->cow_inode;
3493         pgoff_t index = page->index;
3494         int err = 0;
3495         block_t ori_blk_addr = NULL_ADDR;
3496
3497         /* If pos is beyond the end of file, reserve a new block in COW inode */
3498         if ((pos & PAGE_MASK) >= i_size_read(inode))
3499                 goto reserve_block;
3500
3501         /* Look for the block in COW inode first */
3502         err = __find_data_block(cow_inode, index, blk_addr);
3503         if (err)
3504                 return err;
3505         else if (*blk_addr != NULL_ADDR)
3506                 return 0;
3507
3508         if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
3509                 goto reserve_block;
3510
3511         /* Look for the block in the original inode */
3512         err = __find_data_block(inode, index, &ori_blk_addr);
3513         if (err)
3514                 return err;
3515
3516 reserve_block:
3517         /* Finally, we should reserve a new block in COW inode for the update */
3518         err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
3519         if (err)
3520                 return err;
3521         inc_atomic_write_cnt(inode);
3522
3523         if (ori_blk_addr != NULL_ADDR)
3524                 *blk_addr = ori_blk_addr;
3525         return 0;
3526 }
3527
3528 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
3529                 loff_t pos, unsigned len, struct page **pagep, void **fsdata)
3530 {
3531         struct inode *inode = mapping->host;
3532         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3533         struct page *page = NULL;
3534         pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
3535         bool need_balance = false;
3536         block_t blkaddr = NULL_ADDR;
3537         int err = 0;
3538
3539         trace_f2fs_write_begin(inode, pos, len);
3540
3541         if (!f2fs_is_checkpoint_ready(sbi)) {
3542                 err = -ENOSPC;
3543                 goto fail;
3544         }
3545
3546         /*
3547          * We should check this at this moment to avoid deadlock on inode page
3548          * and #0 page. The locking rule for inline_data conversion should be:
3549          * lock_page(page #0) -> lock_page(inode_page)
3550          */
3551         if (index != 0) {
3552                 err = f2fs_convert_inline_inode(inode);
3553                 if (err)
3554                         goto fail;
3555         }
3556
3557 #ifdef CONFIG_F2FS_FS_COMPRESSION
3558         if (f2fs_compressed_file(inode)) {
3559                 int ret;
3560
3561                 *fsdata = NULL;
3562
3563                 if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3564                         goto repeat;
3565
3566                 ret = f2fs_prepare_compress_overwrite(inode, pagep,
3567                                                         index, fsdata);
3568                 if (ret < 0) {
3569                         err = ret;
3570                         goto fail;
3571                 } else if (ret) {
3572                         return 0;
3573                 }
3574         }
3575 #endif
3576
3577 repeat:
3578         /*
3579          * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3580          * wait_for_stable_page. Will wait that below with our IO control.
3581          */
3582         page = f2fs_pagecache_get_page(mapping, index,
3583                                 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3584         if (!page) {
3585                 err = -ENOMEM;
3586                 goto fail;
3587         }
3588
3589         /* TODO: cluster can be compressed due to race with .writepage */
3590
3591         *pagep = page;
3592
3593         if (f2fs_is_atomic_file(inode))
3594                 err = prepare_atomic_write_begin(sbi, page, pos, len,
3595                                         &blkaddr, &need_balance);
3596         else
3597                 err = prepare_write_begin(sbi, page, pos, len,
3598                                         &blkaddr, &need_balance);
3599         if (err)
3600                 goto fail;
3601
3602         if (need_balance && !IS_NOQUOTA(inode) &&
3603                         has_not_enough_free_secs(sbi, 0, 0)) {
3604                 unlock_page(page);
3605                 f2fs_balance_fs(sbi, true);
3606                 lock_page(page);
3607                 if (page->mapping != mapping) {
3608                         /* The page got truncated from under us */
3609                         f2fs_put_page(page, 1);
3610                         goto repeat;
3611                 }
3612         }
3613
3614         f2fs_wait_on_page_writeback(page, DATA, false, true);
3615
3616         if (len == PAGE_SIZE || PageUptodate(page))
3617                 return 0;
3618
3619         if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3620             !f2fs_verity_in_progress(inode)) {
3621                 zero_user_segment(page, len, PAGE_SIZE);
3622                 return 0;
3623         }
3624
3625         if (blkaddr == NEW_ADDR) {
3626                 zero_user_segment(page, 0, PAGE_SIZE);
3627                 SetPageUptodate(page);
3628         } else {
3629                 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3630                                 DATA_GENERIC_ENHANCE_READ)) {
3631                         err = -EFSCORRUPTED;
3632                         f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
3633                         goto fail;
3634                 }
3635                 err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
3636                 if (err)
3637                         goto fail;
3638
3639                 lock_page(page);
3640                 if (unlikely(page->mapping != mapping)) {
3641                         f2fs_put_page(page, 1);
3642                         goto repeat;
3643                 }
3644                 if (unlikely(!PageUptodate(page))) {
3645                         err = -EIO;
3646                         goto fail;
3647                 }
3648         }
3649         return 0;
3650
3651 fail:
3652         f2fs_put_page(page, 1);
3653         f2fs_write_failed(inode, pos + len);
3654         return err;
3655 }
3656
3657 static int f2fs_write_end(struct file *file,
3658                         struct address_space *mapping,
3659                         loff_t pos, unsigned len, unsigned copied,
3660                         struct page *page, void *fsdata)
3661 {
3662         struct inode *inode = page->mapping->host;
3663
3664         trace_f2fs_write_end(inode, pos, len, copied);
3665
3666         /*
3667          * This should be come from len == PAGE_SIZE, and we expect copied
3668          * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3669          * let generic_perform_write() try to copy data again through copied=0.
3670          */
3671         if (!PageUptodate(page)) {
3672                 if (unlikely(copied != len))
3673                         copied = 0;
3674                 else
3675                         SetPageUptodate(page);
3676         }
3677
3678 #ifdef CONFIG_F2FS_FS_COMPRESSION
3679         /* overwrite compressed file */
3680         if (f2fs_compressed_file(inode) && fsdata) {
3681                 f2fs_compress_write_end(inode, fsdata, page->index, copied);
3682                 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3683
3684                 if (pos + copied > i_size_read(inode) &&
3685                                 !f2fs_verity_in_progress(inode))
3686                         f2fs_i_size_write(inode, pos + copied);
3687                 return copied;
3688         }
3689 #endif
3690
3691         if (!copied)
3692                 goto unlock_out;
3693
3694         set_page_dirty(page);
3695
3696         if (pos + copied > i_size_read(inode) &&
3697             !f2fs_verity_in_progress(inode)) {
3698                 f2fs_i_size_write(inode, pos + copied);
3699                 if (f2fs_is_atomic_file(inode))
3700                         f2fs_i_size_write(F2FS_I(inode)->cow_inode,
3701                                         pos + copied);
3702         }
3703 unlock_out:
3704         f2fs_put_page(page, 1);
3705         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3706         return copied;
3707 }
3708
3709 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
3710 {
3711         struct inode *inode = folio->mapping->host;
3712         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3713
3714         if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3715                                 (offset || length != folio_size(folio)))
3716                 return;
3717
3718         if (folio_test_dirty(folio)) {
3719                 if (inode->i_ino == F2FS_META_INO(sbi)) {
3720                         dec_page_count(sbi, F2FS_DIRTY_META);
3721                 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3722                         dec_page_count(sbi, F2FS_DIRTY_NODES);
3723                 } else {
3724                         inode_dec_dirty_pages(inode);
3725                         f2fs_remove_dirty_inode(inode);
3726                 }
3727         }
3728
3729         clear_page_private_reference(&folio->page);
3730         clear_page_private_gcing(&folio->page);
3731
3732         if (test_opt(sbi, COMPRESS_CACHE) &&
3733                         inode->i_ino == F2FS_COMPRESS_INO(sbi))
3734                 clear_page_private_data(&folio->page);
3735
3736         folio_detach_private(folio);
3737 }
3738
3739 bool f2fs_release_folio(struct folio *folio, gfp_t wait)
3740 {
3741         struct f2fs_sb_info *sbi;
3742
3743         /* If this is dirty folio, keep private data */
3744         if (folio_test_dirty(folio))
3745                 return false;
3746
3747         sbi = F2FS_M_SB(folio->mapping);
3748         if (test_opt(sbi, COMPRESS_CACHE)) {
3749                 struct inode *inode = folio->mapping->host;
3750
3751                 if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
3752                         clear_page_private_data(&folio->page);
3753         }
3754
3755         clear_page_private_reference(&folio->page);
3756         clear_page_private_gcing(&folio->page);
3757
3758         folio_detach_private(folio);
3759         return true;
3760 }
3761
3762 static bool f2fs_dirty_data_folio(struct address_space *mapping,
3763                 struct folio *folio)
3764 {
3765         struct inode *inode = mapping->host;
3766
3767         trace_f2fs_set_page_dirty(&folio->page, DATA);
3768
3769         if (!folio_test_uptodate(folio))
3770                 folio_mark_uptodate(folio);
3771         BUG_ON(folio_test_swapcache(folio));
3772
3773         if (filemap_dirty_folio(mapping, folio)) {
3774                 f2fs_update_dirty_folio(inode, folio);
3775                 return true;
3776         }
3777         return false;
3778 }
3779
3780
3781 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3782 {
3783 #ifdef CONFIG_F2FS_FS_COMPRESSION
3784         struct dnode_of_data dn;
3785         sector_t start_idx, blknr = 0;
3786         int ret;
3787
3788         start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3789
3790         set_new_dnode(&dn, inode, NULL, NULL, 0);
3791         ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
3792         if (ret)
3793                 return 0;
3794
3795         if (dn.data_blkaddr != COMPRESS_ADDR) {
3796                 dn.ofs_in_node += block - start_idx;
3797                 blknr = f2fs_data_blkaddr(&dn);
3798                 if (!__is_valid_data_blkaddr(blknr))
3799                         blknr = 0;
3800         }
3801
3802         f2fs_put_dnode(&dn);
3803         return blknr;
3804 #else
3805         return 0;
3806 #endif
3807 }
3808
3809
3810 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3811 {
3812         struct inode *inode = mapping->host;
3813         sector_t blknr = 0;
3814
3815         if (f2fs_has_inline_data(inode))
3816                 goto out;
3817
3818         /* make sure allocating whole blocks */
3819         if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3820                 filemap_write_and_wait(mapping);
3821
3822         /* Block number less than F2FS MAX BLOCKS */
3823         if (unlikely(block >= max_file_blocks(inode)))
3824                 goto out;
3825
3826         if (f2fs_compressed_file(inode)) {
3827                 blknr = f2fs_bmap_compress(inode, block);
3828         } else {
3829                 struct f2fs_map_blocks map;
3830
3831                 memset(&map, 0, sizeof(map));
3832                 map.m_lblk = block;
3833                 map.m_len = 1;
3834                 map.m_next_pgofs = NULL;
3835                 map.m_seg_type = NO_CHECK_TYPE;
3836
3837                 if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP))
3838                         blknr = map.m_pblk;
3839         }
3840 out:
3841         trace_f2fs_bmap(inode, block, blknr);
3842         return blknr;
3843 }
3844
3845 #ifdef CONFIG_SWAP
3846 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3847                                                         unsigned int blkcnt)
3848 {
3849         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3850         unsigned int blkofs;
3851         unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3852         unsigned int secidx = start_blk / blk_per_sec;
3853         unsigned int end_sec = secidx + blkcnt / blk_per_sec;
3854         int ret = 0;
3855
3856         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3857         filemap_invalidate_lock(inode->i_mapping);
3858
3859         set_inode_flag(inode, FI_ALIGNED_WRITE);
3860         set_inode_flag(inode, FI_OPU_WRITE);
3861
3862         for (; secidx < end_sec; secidx++) {
3863                 f2fs_down_write(&sbi->pin_sem);
3864
3865                 f2fs_lock_op(sbi);
3866                 f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3867                 f2fs_unlock_op(sbi);
3868
3869                 set_inode_flag(inode, FI_SKIP_WRITES);
3870
3871                 for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
3872                         struct page *page;
3873                         unsigned int blkidx = secidx * blk_per_sec + blkofs;
3874
3875                         page = f2fs_get_lock_data_page(inode, blkidx, true);
3876                         if (IS_ERR(page)) {
3877                                 f2fs_up_write(&sbi->pin_sem);
3878                                 ret = PTR_ERR(page);
3879                                 goto done;
3880                         }
3881
3882                         set_page_dirty(page);
3883                         f2fs_put_page(page, 1);
3884                 }
3885
3886                 clear_inode_flag(inode, FI_SKIP_WRITES);
3887
3888                 ret = filemap_fdatawrite(inode->i_mapping);
3889
3890                 f2fs_up_write(&sbi->pin_sem);
3891
3892                 if (ret)
3893                         break;
3894         }
3895
3896 done:
3897         clear_inode_flag(inode, FI_SKIP_WRITES);
3898         clear_inode_flag(inode, FI_OPU_WRITE);
3899         clear_inode_flag(inode, FI_ALIGNED_WRITE);
3900
3901         filemap_invalidate_unlock(inode->i_mapping);
3902         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3903
3904         return ret;
3905 }
3906
3907 static int check_swap_activate(struct swap_info_struct *sis,
3908                                 struct file *swap_file, sector_t *span)
3909 {
3910         struct address_space *mapping = swap_file->f_mapping;
3911         struct inode *inode = mapping->host;
3912         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3913         sector_t cur_lblock;
3914         sector_t last_lblock;
3915         sector_t pblock;
3916         sector_t lowest_pblock = -1;
3917         sector_t highest_pblock = 0;
3918         int nr_extents = 0;
3919         unsigned long nr_pblocks;
3920         unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
3921         unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
3922         unsigned int not_aligned = 0;
3923         int ret = 0;
3924
3925         /*
3926          * Map all the blocks into the extent list.  This code doesn't try
3927          * to be very smart.
3928          */
3929         cur_lblock = 0;
3930         last_lblock = bytes_to_blks(inode, i_size_read(inode));
3931
3932         while (cur_lblock < last_lblock && cur_lblock < sis->max) {
3933                 struct f2fs_map_blocks map;
3934 retry:
3935                 cond_resched();
3936
3937                 memset(&map, 0, sizeof(map));
3938                 map.m_lblk = cur_lblock;
3939                 map.m_len = last_lblock - cur_lblock;
3940                 map.m_next_pgofs = NULL;
3941                 map.m_next_extent = NULL;
3942                 map.m_seg_type = NO_CHECK_TYPE;
3943                 map.m_may_create = false;
3944
3945                 ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
3946                 if (ret)
3947                         goto out;
3948
3949                 /* hole */
3950                 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
3951                         f2fs_err(sbi, "Swapfile has holes");
3952                         ret = -EINVAL;
3953                         goto out;
3954                 }
3955
3956                 pblock = map.m_pblk;
3957                 nr_pblocks = map.m_len;
3958
3959                 if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
3960                                 nr_pblocks & sec_blks_mask) {
3961                         not_aligned++;
3962
3963                         nr_pblocks = roundup(nr_pblocks, blks_per_sec);
3964                         if (cur_lblock + nr_pblocks > sis->max)
3965                                 nr_pblocks -= blks_per_sec;
3966
3967                         if (!nr_pblocks) {
3968                                 /* this extent is last one */
3969                                 nr_pblocks = map.m_len;
3970                                 f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
3971                                 goto next;
3972                         }
3973
3974                         ret = f2fs_migrate_blocks(inode, cur_lblock,
3975                                                         nr_pblocks);
3976                         if (ret)
3977                                 goto out;
3978                         goto retry;
3979                 }
3980 next:
3981                 if (cur_lblock + nr_pblocks >= sis->max)
3982                         nr_pblocks = sis->max - cur_lblock;
3983
3984                 if (cur_lblock) {       /* exclude the header page */
3985                         if (pblock < lowest_pblock)
3986                                 lowest_pblock = pblock;
3987                         if (pblock + nr_pblocks - 1 > highest_pblock)
3988                                 highest_pblock = pblock + nr_pblocks - 1;
3989                 }
3990
3991                 /*
3992                  * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
3993                  */
3994                 ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
3995                 if (ret < 0)
3996                         goto out;
3997                 nr_extents += ret;
3998                 cur_lblock += nr_pblocks;
3999         }
4000         ret = nr_extents;
4001         *span = 1 + highest_pblock - lowest_pblock;
4002         if (cur_lblock == 0)
4003                 cur_lblock = 1; /* force Empty message */
4004         sis->max = cur_lblock;
4005         sis->pages = cur_lblock - 1;
4006         sis->highest_bit = cur_lblock - 1;
4007 out:
4008         if (not_aligned)
4009                 f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
4010                           not_aligned, blks_per_sec * F2FS_BLKSIZE);
4011         return ret;
4012 }
4013
4014 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4015                                 sector_t *span)
4016 {
4017         struct inode *inode = file_inode(file);
4018         int ret;
4019
4020         if (!S_ISREG(inode->i_mode))
4021                 return -EINVAL;
4022
4023         if (f2fs_readonly(F2FS_I_SB(inode)->sb))
4024                 return -EROFS;
4025
4026         if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
4027                 f2fs_err(F2FS_I_SB(inode),
4028                         "Swapfile not supported in LFS mode");
4029                 return -EINVAL;
4030         }
4031
4032         ret = f2fs_convert_inline_inode(inode);
4033         if (ret)
4034                 return ret;
4035
4036         if (!f2fs_disable_compressed_file(inode))
4037                 return -EINVAL;
4038
4039         f2fs_precache_extents(inode);
4040
4041         ret = check_swap_activate(sis, file, span);
4042         if (ret < 0)
4043                 return ret;
4044
4045         stat_inc_swapfile_inode(inode);
4046         set_inode_flag(inode, FI_PIN_FILE);
4047         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
4048         return ret;
4049 }
4050
4051 static void f2fs_swap_deactivate(struct file *file)
4052 {
4053         struct inode *inode = file_inode(file);
4054
4055         stat_dec_swapfile_inode(inode);
4056         clear_inode_flag(inode, FI_PIN_FILE);
4057 }
4058 #else
4059 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4060                                 sector_t *span)
4061 {
4062         return -EOPNOTSUPP;
4063 }
4064
4065 static void f2fs_swap_deactivate(struct file *file)
4066 {
4067 }
4068 #endif
4069
4070 const struct address_space_operations f2fs_dblock_aops = {
4071         .read_folio     = f2fs_read_data_folio,
4072         .readahead      = f2fs_readahead,
4073         .writepage      = f2fs_write_data_page,
4074         .writepages     = f2fs_write_data_pages,
4075         .write_begin    = f2fs_write_begin,
4076         .write_end      = f2fs_write_end,
4077         .dirty_folio    = f2fs_dirty_data_folio,
4078         .migrate_folio  = filemap_migrate_folio,
4079         .invalidate_folio = f2fs_invalidate_folio,
4080         .release_folio  = f2fs_release_folio,
4081         .direct_IO      = noop_direct_IO,
4082         .bmap           = f2fs_bmap,
4083         .swap_activate  = f2fs_swap_activate,
4084         .swap_deactivate = f2fs_swap_deactivate,
4085 };
4086
4087 void f2fs_clear_page_cache_dirty_tag(struct page *page)
4088 {
4089         struct address_space *mapping = page_mapping(page);
4090         unsigned long flags;
4091
4092         xa_lock_irqsave(&mapping->i_pages, flags);
4093         __xa_clear_mark(&mapping->i_pages, page_index(page),
4094                                                 PAGECACHE_TAG_DIRTY);
4095         xa_unlock_irqrestore(&mapping->i_pages, flags);
4096 }
4097
4098 int __init f2fs_init_post_read_processing(void)
4099 {
4100         bio_post_read_ctx_cache =
4101                 kmem_cache_create("f2fs_bio_post_read_ctx",
4102                                   sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4103         if (!bio_post_read_ctx_cache)
4104                 goto fail;
4105         bio_post_read_ctx_pool =
4106                 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4107                                          bio_post_read_ctx_cache);
4108         if (!bio_post_read_ctx_pool)
4109                 goto fail_free_cache;
4110         return 0;
4111
4112 fail_free_cache:
4113         kmem_cache_destroy(bio_post_read_ctx_cache);
4114 fail:
4115         return -ENOMEM;
4116 }
4117
4118 void f2fs_destroy_post_read_processing(void)
4119 {
4120         mempool_destroy(bio_post_read_ctx_pool);
4121         kmem_cache_destroy(bio_post_read_ctx_cache);
4122 }
4123
4124 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4125 {
4126         if (!f2fs_sb_has_encrypt(sbi) &&
4127                 !f2fs_sb_has_verity(sbi) &&
4128                 !f2fs_sb_has_compression(sbi))
4129                 return 0;
4130
4131         sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4132                                                  WQ_UNBOUND | WQ_HIGHPRI,
4133                                                  num_online_cpus());
4134         return sbi->post_read_wq ? 0 : -ENOMEM;
4135 }
4136
4137 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4138 {
4139         if (sbi->post_read_wq)
4140                 destroy_workqueue(sbi->post_read_wq);
4141 }
4142
4143 int __init f2fs_init_bio_entry_cache(void)
4144 {
4145         bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4146                         sizeof(struct bio_entry));
4147         return bio_entry_slab ? 0 : -ENOMEM;
4148 }
4149
4150 void f2fs_destroy_bio_entry_cache(void)
4151 {
4152         kmem_cache_destroy(bio_entry_slab);
4153 }
4154
4155 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
4156                             unsigned int flags, struct iomap *iomap,
4157                             struct iomap *srcmap)
4158 {
4159         struct f2fs_map_blocks map = {};
4160         pgoff_t next_pgofs = 0;
4161         int err;
4162
4163         map.m_lblk = bytes_to_blks(inode, offset);
4164         map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
4165         map.m_next_pgofs = &next_pgofs;
4166         map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
4167         if (flags & IOMAP_WRITE)
4168                 map.m_may_create = true;
4169
4170         err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
4171         if (err)
4172                 return err;
4173
4174         iomap->offset = blks_to_bytes(inode, map.m_lblk);
4175
4176         /*
4177          * When inline encryption is enabled, sometimes I/O to an encrypted file
4178          * has to be broken up to guarantee DUN contiguity.  Handle this by
4179          * limiting the length of the mapping returned.
4180          */
4181         map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
4182
4183         /*
4184          * We should never see delalloc or compressed extents here based on
4185          * prior flushing and checks.
4186          */
4187         if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
4188                 return -EINVAL;
4189         if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
4190                 return -EINVAL;
4191
4192         if (map.m_pblk != NULL_ADDR) {
4193                 iomap->length = blks_to_bytes(inode, map.m_len);
4194                 iomap->type = IOMAP_MAPPED;
4195                 iomap->flags |= IOMAP_F_MERGED;
4196                 iomap->bdev = map.m_bdev;
4197                 iomap->addr = blks_to_bytes(inode, map.m_pblk);
4198         } else {
4199                 if (flags & IOMAP_WRITE)
4200                         return -ENOTBLK;
4201                 iomap->length = blks_to_bytes(inode, next_pgofs) -
4202                                 iomap->offset;
4203                 iomap->type = IOMAP_HOLE;
4204                 iomap->addr = IOMAP_NULL_ADDR;
4205         }
4206
4207         if (map.m_flags & F2FS_MAP_NEW)
4208                 iomap->flags |= IOMAP_F_NEW;
4209         if ((inode->i_state & I_DIRTY_DATASYNC) ||
4210             offset + length > i_size_read(inode))
4211                 iomap->flags |= IOMAP_F_DIRTY;
4212
4213         return 0;
4214 }
4215
4216 const struct iomap_ops f2fs_iomap_ops = {
4217         .iomap_begin    = f2fs_iomap_begin,
4218 };