fs/f2fs/file.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * fs/f2fs/file.c
   4  *
   5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   6  *             http://www.samsung.com/
   7  */
   8 #include <linux/fs.h>
   9 #include <linux/f2fs_fs.h>
  10 #include <linux/stat.h>
  11 #include <linux/writeback.h>
  12 #include <linux/blkdev.h>
  13 #include <linux/falloc.h>
  14 #include <linux/types.h>
  15 #include <linux/compat.h>
  16 #include <linux/uaccess.h>
  17 #include <linux/mount.h>
  18 #include <linux/pagevec.h>
  19 #include <linux/uio.h>
  20 #include <linux/uuid.h>
  21 #include <linux/file.h>
  22 #include <linux/nls.h>
  23 #include <linux/sched/signal.h>
  24 #include <linux/fileattr.h>
  25 #include <linux/fadvise.h>
  26 #include <linux/iomap.h>
  27
  28 #include "f2fs.h"
  29 #include "node.h"
  30 #include "segment.h"
  31 #include "xattr.h"
  32 #include "acl.h"
  33 #include "gc.h"
  34 #include "iostat.h"
  35 #include <trace/events/f2fs.h>
  36 #include <uapi/linux/f2fs.h>
  37
  38 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
  39 {
  40         struct inode *inode = file_inode(vmf->vma->vm_file);
  41         vm_flags_t flags = vmf->vma->vm_flags;
  42         vm_fault_t ret;
  43
  44         ret = filemap_fault(vmf);
  45         if (ret & VM_FAULT_LOCKED)
  46                 f2fs_update_iostat(F2FS_I_SB(inode), inode,
  47                                         APP_MAPPED_READ_IO, F2FS_BLKSIZE);
  48
  49         trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret);
  50
  51         return ret;
  52 }
  53
  54 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
  55 {
  56         struct folio *folio = page_folio(vmf->page);
  57         struct inode *inode = file_inode(vmf->vma->vm_file);
  58         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  59         struct dnode_of_data dn;
  60         bool need_alloc = !f2fs_is_pinned_file(inode);
  61         int err = 0;
  62         vm_fault_t ret;
  63
  64         if (unlikely(IS_IMMUTABLE(inode)))
  65                 return VM_FAULT_SIGBUS;
  66
  67         if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
  68                 err = -EIO;
  69                 goto out;
  70         }
  71
  72         if (unlikely(f2fs_cp_error(sbi))) {
  73                 err = -EIO;
  74                 goto out;
  75         }
  76
  77         if (!f2fs_is_checkpoint_ready(sbi)) {
  78                 err = -ENOSPC;
  79                 goto out;
  80         }
  81
  82         err = f2fs_convert_inline_inode(inode);
  83         if (err)
  84                 goto out;
  85
  86 #ifdef CONFIG_F2FS_FS_COMPRESSION
  87         if (f2fs_compressed_file(inode)) {
  88                 int ret = f2fs_is_compressed_cluster(inode, folio->index);
  89
  90                 if (ret < 0) {
  91                         err = ret;
  92                         goto out;
  93                 } else if (ret) {
  94                         need_alloc = false;
  95                 }
  96         }
  97 #endif
  98         /* should do out of any locked page */
  99         if (need_alloc)
 100                 f2fs_balance_fs(sbi, true);
 101
 102         sb_start_pagefault(inode->i_sb);
 103
 104         f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
 105
 106         file_update_time(vmf->vma->vm_file);
 107         filemap_invalidate_lock_shared(inode->i_mapping);
 108         folio_lock(folio);
 109         if (unlikely(folio->mapping != inode->i_mapping ||
 110                         folio_pos(folio) > i_size_read(inode) ||
 111                         !folio_test_uptodate(folio))) {
 112                 folio_unlock(folio);
 113                 err = -EFAULT;
 114                 goto out_sem;
 115         }
 116
 117         set_new_dnode(&dn, inode, NULL, NULL, 0);
 118         if (need_alloc) {
 119                 /* block allocation */
 120                 err = f2fs_get_block_locked(&dn, folio->index);
 121         } else {
 122                 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
 123                 f2fs_put_dnode(&dn);
 124                 if (f2fs_is_pinned_file(inode) &&
 125                     !__is_valid_data_blkaddr(dn.data_blkaddr))
 126                         err = -EIO;
 127         }
 128
 129         if (err) {
 130                 folio_unlock(folio);
 131                 goto out_sem;
 132         }
 133
 134         f2fs_wait_on_page_writeback(folio_page(folio, 0), DATA, false, true);
 135
 136         /* wait for GCed page writeback via META_MAPPING */
 137         f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
 138
 139         /*
 140          * check to see if the page is mapped already (no holes)
 141          */
 142         if (folio_test_mappedtodisk(folio))
 143                 goto out_sem;
 144
 145         /* page is wholly or partially inside EOF */
 146         if (((loff_t)(folio->index + 1) << PAGE_SHIFT) >
 147                                                 i_size_read(inode)) {
 148                 loff_t offset;
 149
 150                 offset = i_size_read(inode) & ~PAGE_MASK;
 151                 folio_zero_segment(folio, offset, folio_size(folio));
 152         }
 153         folio_mark_dirty(folio);
 154
 155         f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
 156         f2fs_update_time(sbi, REQ_TIME);
 157
 158 out_sem:
 159         filemap_invalidate_unlock_shared(inode->i_mapping);
 160
 161         sb_end_pagefault(inode->i_sb);
 162 out:
 163         ret = vmf_fs_error(err);
 164
 165         trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret);
 166         return ret;
 167 }
 168
 169 static const struct vm_operations_struct f2fs_file_vm_ops = {
 170         .fault          = f2fs_filemap_fault,
 171         .map_pages      = filemap_map_pages,
 172         .page_mkwrite   = f2fs_vm_page_mkwrite,
 173 };
 174
 175 static int get_parent_ino(struct inode *inode, nid_t *pino)
 176 {
 177         struct dentry *dentry;
 178
 179         /*
 180          * Make sure to get the non-deleted alias.  The alias associated with
 181          * the open file descriptor being fsync()'ed may be deleted already.
 182          */
 183         dentry = d_find_alias(inode);
 184         if (!dentry)
 185                 return 0;
 186
 187         *pino = d_parent_ino(dentry);
 188         dput(dentry);
 189         return 1;
 190 }
 191
 192 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
 193 {
 194         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 195         enum cp_reason_type cp_reason = CP_NO_NEEDED;
 196
 197         if (!S_ISREG(inode->i_mode))
 198                 cp_reason = CP_NON_REGULAR;
 199         else if (f2fs_compressed_file(inode))
 200                 cp_reason = CP_COMPRESSED;
 201         else if (inode->i_nlink != 1)
 202                 cp_reason = CP_HARDLINK;
 203         else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
 204                 cp_reason = CP_SB_NEED_CP;
 205         else if (file_wrong_pino(inode))
 206                 cp_reason = CP_WRONG_PINO;
 207         else if (!f2fs_space_for_roll_forward(sbi))
 208                 cp_reason = CP_NO_SPC_ROLL;
 209         else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
 210                 cp_reason = CP_NODE_NEED_CP;
 211         else if (test_opt(sbi, FASTBOOT))
 212                 cp_reason = CP_FASTBOOT_MODE;
 213         else if (F2FS_OPTION(sbi).active_logs == 2)
 214                 cp_reason = CP_SPEC_LOG_NUM;
 215         else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
 216                 f2fs_need_dentry_mark(sbi, inode->i_ino) &&
 217                 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
 218                                                         TRANS_DIR_INO))
 219                 cp_reason = CP_RECOVER_DIR;
 220         else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
 221                                                         XATTR_DIR_INO))
 222                 cp_reason = CP_XATTR_DIR;
 223
 224         return cp_reason;
 225 }
 226
 227 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
 228 {
 229         struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
 230         bool ret = false;
 231         /* But we need to avoid that there are some inode updates */
 232         if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino))
 233                 ret = true;
 234         f2fs_put_page(i, 0);
 235         return ret;
 236 }
 237
 238 static void try_to_fix_pino(struct inode *inode)
 239 {
 240         struct f2fs_inode_info *fi = F2FS_I(inode);
 241         nid_t pino;
 242
 243         f2fs_down_write(&fi->i_sem);
 244         if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
 245                         get_parent_ino(inode, &pino)) {
 246                 f2fs_i_pino_write(inode, pino);
 247                 file_got_pino(inode);
 248         }
 249         f2fs_up_write(&fi->i_sem);
 250 }
 251
 252 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 253                                                 int datasync, bool atomic)
 254 {
 255         struct inode *inode = file->f_mapping->host;
 256         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 257         nid_t ino = inode->i_ino;
 258         int ret = 0;
 259         enum cp_reason_type cp_reason = 0;
 260         struct writeback_control wbc = {
 261                 .sync_mode = WB_SYNC_ALL,
 262                 .nr_to_write = LONG_MAX,
 263                 .for_reclaim = 0,
 264         };
 265         unsigned int seq_id = 0;
 266
 267         if (unlikely(f2fs_readonly(inode->i_sb)))
 268                 return 0;
 269
 270         trace_f2fs_sync_file_enter(inode);
 271
 272         if (S_ISDIR(inode->i_mode))
 273                 goto go_write;
 274
 275         /* if fdatasync is triggered, let's do in-place-update */
 276         if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
 277                 set_inode_flag(inode, FI_NEED_IPU);
 278         ret = file_write_and_wait_range(file, start, end);
 279         clear_inode_flag(inode, FI_NEED_IPU);
 280
 281         if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 282                 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
 283                 return ret;
 284         }
 285
 286         /* if the inode is dirty, let's recover all the time */
 287         if (!f2fs_skip_inode_update(inode, datasync)) {
 288                 f2fs_write_inode(inode, NULL);
 289                 goto go_write;
 290         }
 291
 292         /*
 293          * if there is no written data, don't waste time to write recovery info.
 294          */
 295         if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
 296                         !f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
 297
 298                 /* it may call write_inode just prior to fsync */
 299                 if (need_inode_page_update(sbi, ino))
 300                         goto go_write;
 301
 302                 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
 303                                 f2fs_exist_written_data(sbi, ino, UPDATE_INO))
 304                         goto flush_out;
 305                 goto out;
 306         } else {
 307                 /*
 308                  * for OPU case, during fsync(), node can be persisted before
 309                  * data when lower device doesn't support write barrier, result
 310                  * in data corruption after SPO.
 311                  * So for strict fsync mode, force to use atomic write semantics
 312                  * to keep write order in between data/node and last node to
 313                  * avoid potential data corruption.
 314                  */
 315                 if (F2FS_OPTION(sbi).fsync_mode ==
 316                                 FSYNC_MODE_STRICT && !atomic)
 317                         atomic = true;
 318         }
 319 go_write:
 320         /*
 321          * Both of fdatasync() and fsync() are able to be recovered from
 322          * sudden-power-off.
 323          */
 324         f2fs_down_read(&F2FS_I(inode)->i_sem);
 325         cp_reason = need_do_checkpoint(inode);
 326         f2fs_up_read(&F2FS_I(inode)->i_sem);
 327
 328         if (cp_reason) {
 329                 /* all the dirty node pages should be flushed for POR */
 330                 ret = f2fs_sync_fs(inode->i_sb, 1);
 331
 332                 /*
 333                  * We've secured consistency through sync_fs. Following pino
 334                  * will be used only for fsynced inodes after checkpoint.
 335                  */
 336                 try_to_fix_pino(inode);
 337                 clear_inode_flag(inode, FI_APPEND_WRITE);
 338                 clear_inode_flag(inode, FI_UPDATE_WRITE);
 339                 goto out;
 340         }
 341 sync_nodes:
 342         atomic_inc(&sbi->wb_sync_req[NODE]);
 343         ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
 344         atomic_dec(&sbi->wb_sync_req[NODE]);
 345         if (ret)
 346                 goto out;
 347
 348         /* if cp_error was enabled, we should avoid infinite loop */
 349         if (unlikely(f2fs_cp_error(sbi))) {
 350                 ret = -EIO;
 351                 goto out;
 352         }
 353
 354         if (f2fs_need_inode_block_update(sbi, ino)) {
 355                 f2fs_mark_inode_dirty_sync(inode, true);
 356                 f2fs_write_inode(inode, NULL);
 357                 goto sync_nodes;
 358         }
 359
 360         /*
 361          * If it's atomic_write, it's just fine to keep write ordering. So
 362          * here we don't need to wait for node write completion, since we use
 363          * node chain which serializes node blocks. If one of node writes are
 364          * reordered, we can see simply broken chain, resulting in stopping
 365          * roll-forward recovery. It means we'll recover all or none node blocks
 366          * given fsync mark.
 367          */
 368         if (!atomic) {
 369                 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
 370                 if (ret)
 371                         goto out;
 372         }
 373
 374         /* once recovery info is written, don't need to tack this */
 375         f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
 376         clear_inode_flag(inode, FI_APPEND_WRITE);
 377 flush_out:
 378         if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
 379                 ret = f2fs_issue_flush(sbi, inode->i_ino);
 380         if (!ret) {
 381                 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
 382                 clear_inode_flag(inode, FI_UPDATE_WRITE);
 383                 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
 384         }
 385         f2fs_update_time(sbi, REQ_TIME);
 386 out:
 387         trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
 388         return ret;
 389 }
 390
 391 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 392 {
 393         if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
 394                 return -EIO;
 395         return f2fs_do_sync_file(file, start, end, datasync, false);
 396 }
 397
 398 static bool __found_offset(struct address_space *mapping,
 399                 struct dnode_of_data *dn, pgoff_t index, int whence)
 400 {
 401         block_t blkaddr = f2fs_data_blkaddr(dn);
 402         struct inode *inode = mapping->host;
 403         bool compressed_cluster = false;
 404
 405         if (f2fs_compressed_file(inode)) {
 406                 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
 407                     ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size));
 408
 409                 compressed_cluster = first_blkaddr == COMPRESS_ADDR;
 410         }
 411
 412         switch (whence) {
 413         case SEEK_DATA:
 414                 if (__is_valid_data_blkaddr(blkaddr))
 415                         return true;
 416                 if (blkaddr == NEW_ADDR &&
 417                     xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
 418                         return true;
 419                 if (compressed_cluster)
 420                         return true;
 421                 break;
 422         case SEEK_HOLE:
 423                 if (compressed_cluster)
 424                         return false;
 425                 if (blkaddr == NULL_ADDR)
 426                         return true;
 427                 break;
 428         }
 429         return false;
 430 }
 431
 432 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 433 {
 434         struct inode *inode = file->f_mapping->host;
 435         loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
 436         struct dnode_of_data dn;
 437         pgoff_t pgofs, end_offset;
 438         loff_t data_ofs = offset;
 439         loff_t isize;
 440         int err = 0;
 441
 442         inode_lock_shared(inode);
 443
 444         isize = i_size_read(inode);
 445         if (offset >= isize)
 446                 goto fail;
 447
 448         /* handle inline data case */
 449         if (f2fs_has_inline_data(inode)) {
 450                 if (whence == SEEK_HOLE) {
 451                         data_ofs = isize;
 452                         goto found;
 453                 } else if (whence == SEEK_DATA) {
 454                         data_ofs = offset;
 455                         goto found;
 456                 }
 457         }
 458
 459         pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
 460
 461         for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
 462                 set_new_dnode(&dn, inode, NULL, NULL, 0);
 463                 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
 464                 if (err && err != -ENOENT) {
 465                         goto fail;
 466                 } else if (err == -ENOENT) {
 467                         /* direct node does not exists */
 468                         if (whence == SEEK_DATA) {
 469                                 pgofs = f2fs_get_next_page_offset(&dn, pgofs);
 470                                 continue;
 471                         } else {
 472                                 goto found;
 473                         }
 474                 }
 475
 476                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
 477
 478                 /* find data/hole in dnode block */
 479                 for (; dn.ofs_in_node < end_offset;
 480                                 dn.ofs_in_node++, pgofs++,
 481                                 data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
 482                         block_t blkaddr;
 483
 484                         blkaddr = f2fs_data_blkaddr(&dn);
 485
 486                         if (__is_valid_data_blkaddr(blkaddr) &&
 487                                 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
 488                                         blkaddr, DATA_GENERIC_ENHANCE)) {
 489                                 f2fs_put_dnode(&dn);
 490                                 goto fail;
 491                         }
 492
 493                         if (__found_offset(file->f_mapping, &dn,
 494                                                         pgofs, whence)) {
 495                                 f2fs_put_dnode(&dn);
 496                                 goto found;
 497                         }
 498                 }
 499                 f2fs_put_dnode(&dn);
 500         }
 501
 502         if (whence == SEEK_DATA)
 503                 goto fail;
 504 found:
 505         if (whence == SEEK_HOLE && data_ofs > isize)
 506                 data_ofs = isize;
 507         inode_unlock_shared(inode);
 508         return vfs_setpos(file, data_ofs, maxbytes);
 509 fail:
 510         inode_unlock_shared(inode);
 511         return -ENXIO;
 512 }
 513
 514 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
 515 {
 516         struct inode *inode = file->f_mapping->host;
 517         loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
 518
 519         switch (whence) {
 520         case SEEK_SET:
 521         case SEEK_CUR:
 522         case SEEK_END:
 523                 return generic_file_llseek_size(file, offset, whence,
 524                                                 maxbytes, i_size_read(inode));
 525         case SEEK_DATA:
 526         case SEEK_HOLE:
 527                 if (offset < 0)
 528                         return -ENXIO;
 529                 return f2fs_seek_block(file, offset, whence);
 530         }
 531
 532         return -EINVAL;
 533 }
 534
 535 static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 536 {
 537         struct inode *inode = file_inode(file);
 538
 539         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
 540                 return -EIO;
 541
 542         if (!f2fs_is_compress_backend_ready(inode))
 543                 return -EOPNOTSUPP;
 544
 545         file_accessed(file);
 546         vma->vm_ops = &f2fs_file_vm_ops;
 547
 548         f2fs_down_read(&F2FS_I(inode)->i_sem);
 549         set_inode_flag(inode, FI_MMAP_FILE);
 550         f2fs_up_read(&F2FS_I(inode)->i_sem);
 551
 552         return 0;
 553 }
 554
 555 static int finish_preallocate_blocks(struct inode *inode)
 556 {
 557         int ret;
 558
 559         inode_lock(inode);
 560         if (is_inode_flag_set(inode, FI_OPENED_FILE)) {
 561                 inode_unlock(inode);
 562                 return 0;
 563         }
 564
 565         if (!file_should_truncate(inode)) {
 566                 set_inode_flag(inode, FI_OPENED_FILE);
 567                 inode_unlock(inode);
 568                 return 0;
 569         }
 570
 571         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 572         filemap_invalidate_lock(inode->i_mapping);
 573
 574         truncate_setsize(inode, i_size_read(inode));
 575         ret = f2fs_truncate(inode);
 576
 577         filemap_invalidate_unlock(inode->i_mapping);
 578         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 579
 580         if (!ret)
 581                 set_inode_flag(inode, FI_OPENED_FILE);
 582
 583         inode_unlock(inode);
 584         if (ret)
 585                 return ret;
 586
 587         file_dont_truncate(inode);
 588         return 0;
 589 }
 590
 591 static int f2fs_file_open(struct inode *inode, struct file *filp)
 592 {
 593         int err = fscrypt_file_open(inode, filp);
 594
 595         if (err)
 596                 return err;
 597
 598         if (!f2fs_is_compress_backend_ready(inode))
 599                 return -EOPNOTSUPP;
 600
 601         err = fsverity_file_open(inode, filp);
 602         if (err)
 603                 return err;
 604
 605         filp->f_mode |= FMODE_NOWAIT;
 606         filp->f_mode |= FMODE_CAN_ODIRECT;
 607
 608         err = dquot_file_open(inode, filp);
 609         if (err)
 610                 return err;
 611
 612         return finish_preallocate_blocks(inode);
 613 }
 614
 615 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
 616 {
 617         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
 618         int nr_free = 0, ofs = dn->ofs_in_node, len = count;
 619         __le32 *addr;
 620         bool compressed_cluster = false;
 621         int cluster_index = 0, valid_blocks = 0;
 622         int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
 623         bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
 624
 625         addr = get_dnode_addr(dn->inode, dn->node_page) + ofs;
 626
 627         /* Assumption: truncation starts with cluster */
 628         for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) {
 629                 block_t blkaddr = le32_to_cpu(*addr);
 630
 631                 if (f2fs_compressed_file(dn->inode) &&
 632                                         !(cluster_index & (cluster_size - 1))) {
 633                         if (compressed_cluster)
 634                                 f2fs_i_compr_blocks_update(dn->inode,
 635                                                         valid_blocks, false);
 636                         compressed_cluster = (blkaddr == COMPRESS_ADDR);
 637                         valid_blocks = 0;
 638                 }
 639
 640                 if (blkaddr == NULL_ADDR)
 641                         continue;
 642
 643                 f2fs_set_data_blkaddr(dn, NULL_ADDR);
 644
 645                 if (__is_valid_data_blkaddr(blkaddr)) {
 646                         if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
 647                                 continue;
 648                         if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
 649                                                 DATA_GENERIC_ENHANCE))
 650                                 continue;
 651                         if (compressed_cluster)
 652                                 valid_blocks++;
 653                 }
 654
 655                 f2fs_invalidate_blocks(sbi, blkaddr);
 656
 657                 if (!released || blkaddr != COMPRESS_ADDR)
 658                         nr_free++;
 659         }
 660
 661         if (compressed_cluster)
 662                 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false);
 663
 664         if (nr_free) {
 665                 pgoff_t fofs;
 666                 /*
 667                  * once we invalidate valid blkaddr in range [ofs, ofs + count],
 668                  * we will invalidate all blkaddr in the whole range.
 669                  */
 670                 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
 671                                                         dn->inode) + ofs;
 672                 f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
 673                 f2fs_update_age_extent_cache_range(dn, fofs, len);
 674                 dec_valid_block_count(sbi, dn->inode, nr_free);
 675         }
 676         dn->ofs_in_node = ofs;
 677
 678         f2fs_update_time(sbi, REQ_TIME);
 679         trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
 680                                          dn->ofs_in_node, nr_free);
 681 }
 682
 683 static int truncate_partial_data_page(struct inode *inode, u64 from,
 684                                                                 bool cache_only)
 685 {
 686         loff_t offset = from & (PAGE_SIZE - 1);
 687         pgoff_t index = from >> PAGE_SHIFT;
 688         struct address_space *mapping = inode->i_mapping;
 689         struct page *page;
 690
 691         if (!offset && !cache_only)
 692                 return 0;
 693
 694         if (cache_only) {
 695                 page = find_lock_page(mapping, index);
 696                 if (page && PageUptodate(page))
 697                         goto truncate_out;
 698                 f2fs_put_page(page, 1);
 699                 return 0;
 700         }
 701
 702         page = f2fs_get_lock_data_page(inode, index, true);
 703         if (IS_ERR(page))
 704                 return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
 705 truncate_out:
 706         f2fs_wait_on_page_writeback(page, DATA, true, true);
 707         zero_user(page, offset, PAGE_SIZE - offset);
 708
 709         /* An encrypted inode should have a key and truncate the last page. */
 710         f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode));
 711         if (!cache_only)
 712                 set_page_dirty(page);
 713         f2fs_put_page(page, 1);
 714         return 0;
 715 }
 716
 717 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
 718 {
 719         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 720         struct dnode_of_data dn;
 721         pgoff_t free_from;
 722         int count = 0, err = 0;
 723         struct page *ipage;
 724         bool truncate_page = false;
 725
 726         trace_f2fs_truncate_blocks_enter(inode, from);
 727
 728         free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
 729
 730         if (free_from >= max_file_blocks(inode))
 731                 goto free_partial;
 732
 733         if (lock)
 734                 f2fs_lock_op(sbi);
 735
 736         ipage = f2fs_get_node_page(sbi, inode->i_ino);
 737         if (IS_ERR(ipage)) {
 738                 err = PTR_ERR(ipage);
 739                 goto out;
 740         }
 741
 742         if (f2fs_has_inline_data(inode)) {
 743                 f2fs_truncate_inline_inode(inode, ipage, from);
 744                 f2fs_put_page(ipage, 1);
 745                 truncate_page = true;
 746                 goto out;
 747         }
 748
 749         set_new_dnode(&dn, inode, ipage, NULL, 0);
 750         err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
 751         if (err) {
 752                 if (err == -ENOENT)
 753                         goto free_next;
 754                 goto out;
 755         }
 756
 757         count = ADDRS_PER_PAGE(dn.node_page, inode);
 758
 759         count -= dn.ofs_in_node;
 760         f2fs_bug_on(sbi, count < 0);
 761
 762         if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
 763                 f2fs_truncate_data_blocks_range(&dn, count);
 764                 free_from += count;
 765         }
 766
 767         f2fs_put_dnode(&dn);
 768 free_next:
 769         err = f2fs_truncate_inode_blocks(inode, free_from);
 770 out:
 771         if (lock)
 772                 f2fs_unlock_op(sbi);
 773 free_partial:
 774         /* lastly zero out the first data page */
 775         if (!err)
 776                 err = truncate_partial_data_page(inode, from, truncate_page);
 777
 778         trace_f2fs_truncate_blocks_exit(inode, err);
 779         return err;
 780 }
 781
 782 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
 783 {
 784         u64 free_from = from;
 785         int err;
 786
 787 #ifdef CONFIG_F2FS_FS_COMPRESSION
 788         /*
 789          * for compressed file, only support cluster size
 790          * aligned truncation.
 791          */
 792         if (f2fs_compressed_file(inode))
 793                 free_from = round_up(from,
 794                                 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT);
 795 #endif
 796
 797         err = f2fs_do_truncate_blocks(inode, free_from, lock);
 798         if (err)
 799                 return err;
 800
 801 #ifdef CONFIG_F2FS_FS_COMPRESSION
 802         /*
 803          * For compressed file, after release compress blocks, don't allow write
 804          * direct, but we should allow write direct after truncate to zero.
 805          */
 806         if (f2fs_compressed_file(inode) && !free_from
 807                         && is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
 808                 clear_inode_flag(inode, FI_COMPRESS_RELEASED);
 809
 810         if (from != free_from) {
 811                 err = f2fs_truncate_partial_cluster(inode, from, lock);
 812                 if (err)
 813                         return err;
 814         }
 815 #endif
 816
 817         return 0;
 818 }
 819
 820 int f2fs_truncate(struct inode *inode)
 821 {
 822         int err;
 823
 824         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
 825                 return -EIO;
 826
 827         if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 828                                 S_ISLNK(inode->i_mode)))
 829                 return 0;
 830
 831         trace_f2fs_truncate(inode);
 832
 833         if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE))
 834                 return -EIO;
 835
 836         err = f2fs_dquot_initialize(inode);
 837         if (err)
 838                 return err;
 839
 840         /* we should check inline_data size */
 841         if (!f2fs_may_inline_data(inode)) {
 842                 err = f2fs_convert_inline_inode(inode);
 843                 if (err)
 844                         return err;
 845         }
 846
 847         err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
 848         if (err)
 849                 return err;
 850
 851         inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
 852         f2fs_mark_inode_dirty_sync(inode, false);
 853         return 0;
 854 }
 855
 856 static bool f2fs_force_buffered_io(struct inode *inode, int rw)
 857 {
 858         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 859
 860         if (!fscrypt_dio_supported(inode))
 861                 return true;
 862         if (fsverity_active(inode))
 863                 return true;
 864         if (f2fs_compressed_file(inode))
 865                 return true;
 866         if (f2fs_has_inline_data(inode))
 867                 return true;
 868
 869         /* disallow direct IO if any of devices has unaligned blksize */
 870         if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
 871                 return true;
 872         /*
 873          * for blkzoned device, fallback direct IO to buffered IO, so
 874          * all IOs can be serialized by log-structured write.
 875          */
 876         if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) &&
 877             !f2fs_is_pinned_file(inode))
 878                 return true;
 879         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
 880                 return true;
 881
 882         return false;
 883 }
 884
 885 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
 886                  struct kstat *stat, u32 request_mask, unsigned int query_flags)
 887 {
 888         struct inode *inode = d_inode(path->dentry);
 889         struct f2fs_inode_info *fi = F2FS_I(inode);
 890         struct f2fs_inode *ri = NULL;
 891         unsigned int flags;
 892
 893         if (f2fs_has_extra_attr(inode) &&
 894                         f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
 895                         F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
 896                 stat->result_mask |= STATX_BTIME;
 897                 stat->btime.tv_sec = fi->i_crtime.tv_sec;
 898                 stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
 899         }
 900
 901         /*
 902          * Return the DIO alignment restrictions if requested.  We only return
 903          * this information when requested, since on encrypted files it might
 904          * take a fair bit of work to get if the file wasn't opened recently.
 905          *
 906          * f2fs sometimes supports DIO reads but not DIO writes.  STATX_DIOALIGN
 907          * cannot represent that, so in that case we report no DIO support.
 908          */
 909         if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
 910                 unsigned int bsize = i_blocksize(inode);
 911
 912                 stat->result_mask |= STATX_DIOALIGN;
 913                 if (!f2fs_force_buffered_io(inode, WRITE)) {
 914                         stat->dio_mem_align = bsize;
 915                         stat->dio_offset_align = bsize;
 916                 }
 917         }
 918
 919         flags = fi->i_flags;
 920         if (flags & F2FS_COMPR_FL)
 921                 stat->attributes |= STATX_ATTR_COMPRESSED;
 922         if (flags & F2FS_APPEND_FL)
 923                 stat->attributes |= STATX_ATTR_APPEND;
 924         if (IS_ENCRYPTED(inode))
 925                 stat->attributes |= STATX_ATTR_ENCRYPTED;
 926         if (flags & F2FS_IMMUTABLE_FL)
 927                 stat->attributes |= STATX_ATTR_IMMUTABLE;
 928         if (flags & F2FS_NODUMP_FL)
 929                 stat->attributes |= STATX_ATTR_NODUMP;
 930         if (IS_VERITY(inode))
 931                 stat->attributes |= STATX_ATTR_VERITY;
 932
 933         stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
 934                                   STATX_ATTR_APPEND |
 935                                   STATX_ATTR_ENCRYPTED |
 936                                   STATX_ATTR_IMMUTABLE |
 937                                   STATX_ATTR_NODUMP |
 938                                   STATX_ATTR_VERITY);
 939
 940         generic_fillattr(idmap, request_mask, inode, stat);
 941
 942         /* we need to show initial sectors used for inline_data/dentries */
 943         if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
 944                                         f2fs_has_inline_dentry(inode))
 945                 stat->blocks += (stat->size + 511) >> 9;
 946
 947         return 0;
 948 }
 949
 950 #ifdef CONFIG_F2FS_FS_POSIX_ACL
 951 static void __setattr_copy(struct mnt_idmap *idmap,
 952                            struct inode *inode, const struct iattr *attr)
 953 {
 954         unsigned int ia_valid = attr->ia_valid;
 955
 956         i_uid_update(idmap, attr, inode);
 957         i_gid_update(idmap, attr, inode);
 958         if (ia_valid & ATTR_ATIME)
 959                 inode_set_atime_to_ts(inode, attr->ia_atime);
 960         if (ia_valid & ATTR_MTIME)
 961                 inode_set_mtime_to_ts(inode, attr->ia_mtime);
 962         if (ia_valid & ATTR_CTIME)
 963                 inode_set_ctime_to_ts(inode, attr->ia_ctime);
 964         if (ia_valid & ATTR_MODE) {
 965                 umode_t mode = attr->ia_mode;
 966
 967                 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
 968                         mode &= ~S_ISGID;
 969                 set_acl_inode(inode, mode);
 970         }
 971 }
 972 #else
 973 #define __setattr_copy setattr_copy
 974 #endif
 975
 976 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 977                  struct iattr *attr)
 978 {
 979         struct inode *inode = d_inode(dentry);
 980         struct f2fs_inode_info *fi = F2FS_I(inode);
 981         int err;
 982
 983         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
 984                 return -EIO;
 985
 986         if (unlikely(IS_IMMUTABLE(inode)))
 987                 return -EPERM;
 988
 989         if (unlikely(IS_APPEND(inode) &&
 990                         (attr->ia_valid & (ATTR_MODE | ATTR_UID |
 991                                   ATTR_GID | ATTR_TIMES_SET))))
 992                 return -EPERM;
 993
 994         if ((attr->ia_valid & ATTR_SIZE)) {
 995                 if (!f2fs_is_compress_backend_ready(inode))
 996                         return -EOPNOTSUPP;
 997                 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) &&
 998                         !IS_ALIGNED(attr->ia_size,
 999                         F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
1000                         return -EINVAL;
1001         }
1002
1003         err = setattr_prepare(idmap, dentry, attr);
1004         if (err)
1005                 return err;
1006
1007         err = fscrypt_prepare_setattr(dentry, attr);
1008         if (err)
1009                 return err;
1010
1011         err = fsverity_prepare_setattr(dentry, attr);
1012         if (err)
1013                 return err;
1014
1015         if (is_quota_modification(idmap, inode, attr)) {
1016                 err = f2fs_dquot_initialize(inode);
1017                 if (err)
1018                         return err;
1019         }
1020         if (i_uid_needs_update(idmap, attr, inode) ||
1021             i_gid_needs_update(idmap, attr, inode)) {
1022                 f2fs_lock_op(F2FS_I_SB(inode));
1023                 err = dquot_transfer(idmap, inode, attr);
1024                 if (err) {
1025                         set_sbi_flag(F2FS_I_SB(inode),
1026                                         SBI_QUOTA_NEED_REPAIR);
1027                         f2fs_unlock_op(F2FS_I_SB(inode));
1028                         return err;
1029                 }
1030                 /*
1031                  * update uid/gid under lock_op(), so that dquot and inode can
1032                  * be updated atomically.
1033                  */
1034                 i_uid_update(idmap, attr, inode);
1035                 i_gid_update(idmap, attr, inode);
1036                 f2fs_mark_inode_dirty_sync(inode, true);
1037                 f2fs_unlock_op(F2FS_I_SB(inode));
1038         }
1039
1040         if (attr->ia_valid & ATTR_SIZE) {
1041                 loff_t old_size = i_size_read(inode);
1042
1043                 if (attr->ia_size > MAX_INLINE_DATA(inode)) {
1044                         /*
1045                          * should convert inline inode before i_size_write to
1046                          * keep smaller than inline_data size with inline flag.
1047                          */
1048                         err = f2fs_convert_inline_inode(inode);
1049                         if (err)
1050                                 return err;
1051                 }
1052
1053                 /*
1054                  * wait for inflight dio, blocks should be removed after
1055                  * IO completion.
1056                  */
1057                 if (attr->ia_size < old_size)
1058                         inode_dio_wait(inode);
1059
1060                 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
1061                 filemap_invalidate_lock(inode->i_mapping);
1062
1063                 truncate_setsize(inode, attr->ia_size);
1064
1065                 if (attr->ia_size <= old_size)
1066                         err = f2fs_truncate(inode);
1067                 /*
1068                  * do not trim all blocks after i_size if target size is
1069                  * larger than i_size.
1070                  */
1071                 filemap_invalidate_unlock(inode->i_mapping);
1072                 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
1073                 if (err)
1074                         return err;
1075
1076                 spin_lock(&fi->i_size_lock);
1077                 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
1078                 fi->last_disk_size = i_size_read(inode);
1079                 spin_unlock(&fi->i_size_lock);
1080         }
1081
1082         __setattr_copy(idmap, inode, attr);
1083
1084         if (attr->ia_valid & ATTR_MODE) {
1085                 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode));
1086
1087                 if (is_inode_flag_set(inode, FI_ACL_MODE)) {
1088                         if (!err)
1089                                 inode->i_mode = fi->i_acl_mode;
1090                         clear_inode_flag(inode, FI_ACL_MODE);
1091                 }
1092         }
1093
1094         /* file size may changed here */
1095         f2fs_mark_inode_dirty_sync(inode, true);
1096
1097         /* inode change will produce dirty node pages flushed by checkpoint */
1098         f2fs_balance_fs(F2FS_I_SB(inode), true);
1099
1100         return err;
1101 }
1102
1103 const struct inode_operations f2fs_file_inode_operations = {
1104         .getattr        = f2fs_getattr,
1105         .setattr        = f2fs_setattr,
1106         .get_inode_acl  = f2fs_get_acl,
1107         .set_acl        = f2fs_set_acl,
1108         .listxattr      = f2fs_listxattr,
1109         .fiemap         = f2fs_fiemap,
1110         .fileattr_get   = f2fs_fileattr_get,
1111         .fileattr_set   = f2fs_fileattr_set,
1112 };
1113
1114 static int fill_zero(struct inode *inode, pgoff_t index,
1115                                         loff_t start, loff_t len)
1116 {
1117         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1118         struct page *page;
1119
1120         if (!len)
1121                 return 0;
1122
1123         f2fs_balance_fs(sbi, true);
1124
1125         f2fs_lock_op(sbi);
1126         page = f2fs_get_new_data_page(inode, NULL, index, false);
1127         f2fs_unlock_op(sbi);
1128
1129         if (IS_ERR(page))
1130                 return PTR_ERR(page);
1131
1132         f2fs_wait_on_page_writeback(page, DATA, true, true);
1133         zero_user(page, start, len);
1134         set_page_dirty(page);
1135         f2fs_put_page(page, 1);
1136         return 0;
1137 }
1138
1139 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
1140 {
1141         int err;
1142
1143         while (pg_start < pg_end) {
1144                 struct dnode_of_data dn;
1145                 pgoff_t end_offset, count;
1146
1147                 set_new_dnode(&dn, inode, NULL, NULL, 0);
1148                 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
1149                 if (err) {
1150                         if (err == -ENOENT) {
1151                                 pg_start = f2fs_get_next_page_offset(&dn,
1152                                                                 pg_start);
1153                                 continue;
1154                         }
1155                         return err;
1156                 }
1157
1158                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1159                 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
1160
1161                 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
1162
1163                 f2fs_truncate_data_blocks_range(&dn, count);
1164                 f2fs_put_dnode(&dn);
1165
1166                 pg_start += count;
1167         }
1168         return 0;
1169 }
1170
1171 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1172 {
1173         pgoff_t pg_start, pg_end;
1174         loff_t off_start, off_end;
1175         int ret;
1176
1177         ret = f2fs_convert_inline_inode(inode);
1178         if (ret)
1179                 return ret;
1180
1181         pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1182         pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1183
1184         off_start = offset & (PAGE_SIZE - 1);
1185         off_end = (offset + len) & (PAGE_SIZE - 1);
1186
1187         if (pg_start == pg_end) {
1188                 ret = fill_zero(inode, pg_start, off_start,
1189                                                 off_end - off_start);
1190                 if (ret)
1191                         return ret;
1192         } else {
1193                 if (off_start) {
1194                         ret = fill_zero(inode, pg_start++, off_start,
1195                                                 PAGE_SIZE - off_start);
1196                         if (ret)
1197                                 return ret;
1198                 }
1199                 if (off_end) {
1200                         ret = fill_zero(inode, pg_end, 0, off_end);
1201                         if (ret)
1202                                 return ret;
1203                 }
1204
1205                 if (pg_start < pg_end) {
1206                         loff_t blk_start, blk_end;
1207                         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1208
1209                         f2fs_balance_fs(sbi, true);
1210
1211                         blk_start = (loff_t)pg_start << PAGE_SHIFT;
1212                         blk_end = (loff_t)pg_end << PAGE_SHIFT;
1213
1214                         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1215                         filemap_invalidate_lock(inode->i_mapping);
1216
1217                         truncate_pagecache_range(inode, blk_start, blk_end - 1);
1218
1219                         f2fs_lock_op(sbi);
1220                         ret = f2fs_truncate_hole(inode, pg_start, pg_end);
1221                         f2fs_unlock_op(sbi);
1222
1223                         filemap_invalidate_unlock(inode->i_mapping);
1224                         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1225                 }
1226         }
1227
1228         return ret;
1229 }
1230
1231 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
1232                                 int *do_replace, pgoff_t off, pgoff_t len)
1233 {
1234         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1235         struct dnode_of_data dn;
1236         int ret, done, i;
1237
1238 next_dnode:
1239         set_new_dnode(&dn, inode, NULL, NULL, 0);
1240         ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
1241         if (ret && ret != -ENOENT) {
1242                 return ret;
1243         } else if (ret == -ENOENT) {
1244                 if (dn.max_level == 0)
1245                         return -ENOENT;
1246                 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) -
1247                                                 dn.ofs_in_node, len);
1248                 blkaddr += done;
1249                 do_replace += done;
1250                 goto next;
1251         }
1252
1253         done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) -
1254                                                         dn.ofs_in_node, len);
1255         for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
1256                 *blkaddr = f2fs_data_blkaddr(&dn);
1257
1258                 if (__is_valid_data_blkaddr(*blkaddr) &&
1259                         !f2fs_is_valid_blkaddr(sbi, *blkaddr,
1260                                         DATA_GENERIC_ENHANCE)) {
1261                         f2fs_put_dnode(&dn);
1262                         return -EFSCORRUPTED;
1263                 }
1264
1265                 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
1266
1267                         if (f2fs_lfs_mode(sbi)) {
1268                                 f2fs_put_dnode(&dn);
1269                                 return -EOPNOTSUPP;
1270                         }
1271
1272                         /* do not invalidate this block address */
1273                         f2fs_update_data_blkaddr(&dn, NULL_ADDR);
1274                         *do_replace = 1;
1275                 }
1276         }
1277         f2fs_put_dnode(&dn);
1278 next:
1279         len -= done;
1280         off += done;
1281         if (len)
1282                 goto next_dnode;
1283         return 0;
1284 }
1285
1286 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
1287                                 int *do_replace, pgoff_t off, int len)
1288 {
1289         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1290         struct dnode_of_data dn;
1291         int ret, i;
1292
1293         for (i = 0; i < len; i++, do_replace++, blkaddr++) {
1294                 if (*do_replace == 0)
1295                         continue;
1296
1297                 set_new_dnode(&dn, inode, NULL, NULL, 0);
1298                 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
1299                 if (ret) {
1300                         dec_valid_block_count(sbi, inode, 1);
1301                         f2fs_invalidate_blocks(sbi, *blkaddr);
1302                 } else {
1303                         f2fs_update_data_blkaddr(&dn, *blkaddr);
1304                 }
1305                 f2fs_put_dnode(&dn);
1306         }
1307         return 0;
1308 }
1309
1310 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
1311                         block_t *blkaddr, int *do_replace,
1312                         pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
1313 {
1314         struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
1315         pgoff_t i = 0;
1316         int ret;
1317
1318         while (i < len) {
1319                 if (blkaddr[i] == NULL_ADDR && !full) {
1320                         i++;
1321                         continue;
1322                 }
1323
1324                 if (do_replace[i] || blkaddr[i] == NULL_ADDR) {
1325                         struct dnode_of_data dn;
1326                         struct node_info ni;
1327                         size_t new_size;
1328                         pgoff_t ilen;
1329
1330                         set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
1331                         ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
1332                         if (ret)
1333                                 return ret;
1334
1335                         ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
1336                         if (ret) {
1337                                 f2fs_put_dnode(&dn);
1338                                 return ret;
1339                         }
1340
1341                         ilen = min((pgoff_t)
1342                                 ADDRS_PER_PAGE(dn.node_page, dst_inode) -
1343                                                 dn.ofs_in_node, len - i);
1344                         do {
1345                                 dn.data_blkaddr = f2fs_data_blkaddr(&dn);
1346                                 f2fs_truncate_data_blocks_range(&dn, 1);
1347
1348                                 if (do_replace[i]) {
1349                                         f2fs_i_blocks_write(src_inode,
1350                                                         1, false, false);
1351                                         f2fs_i_blocks_write(dst_inode,
1352                                                         1, true, false);
1353                                         f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
1354                                         blkaddr[i], ni.version, true, false);
1355
1356                                         do_replace[i] = 0;
1357                                 }
1358                                 dn.ofs_in_node++;
1359                                 i++;
1360                                 new_size = (loff_t)(dst + i) << PAGE_SHIFT;
1361                                 if (dst_inode->i_size < new_size)
1362                                         f2fs_i_size_write(dst_inode, new_size);
1363                         } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
1364
1365                         f2fs_put_dnode(&dn);
1366                 } else {
1367                         struct page *psrc, *pdst;
1368
1369                         psrc = f2fs_get_lock_data_page(src_inode,
1370                                                         src + i, true);
1371                         if (IS_ERR(psrc))
1372                                 return PTR_ERR(psrc);
1373                         pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i,
1374                                                                 true);
1375                         if (IS_ERR(pdst)) {
1376                                 f2fs_put_page(psrc, 1);
1377                                 return PTR_ERR(pdst);
1378                         }
1379
1380                         f2fs_wait_on_page_writeback(pdst, DATA, true, true);
1381
1382                         memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE);
1383                         set_page_dirty(pdst);
1384                         set_page_private_gcing(pdst);
1385                         f2fs_put_page(pdst, 1);
1386                         f2fs_put_page(psrc, 1);
1387
1388                         ret = f2fs_truncate_hole(src_inode,
1389                                                 src + i, src + i + 1);
1390                         if (ret)
1391                                 return ret;
1392                         i++;
1393                 }
1394         }
1395         return 0;
1396 }
1397
1398 static int __exchange_data_block(struct inode *src_inode,
1399                         struct inode *dst_inode, pgoff_t src, pgoff_t dst,
1400                         pgoff_t len, bool full)
1401 {
1402         block_t *src_blkaddr;
1403         int *do_replace;
1404         pgoff_t olen;
1405         int ret;
1406
1407         while (len) {
1408                 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
1409
1410                 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1411                                         array_size(olen, sizeof(block_t)),
1412                                         GFP_NOFS);
1413                 if (!src_blkaddr)
1414                         return -ENOMEM;
1415
1416                 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1417                                         array_size(olen, sizeof(int)),
1418                                         GFP_NOFS);
1419                 if (!do_replace) {
1420                         kvfree(src_blkaddr);
1421                         return -ENOMEM;
1422                 }
1423
1424                 ret = __read_out_blkaddrs(src_inode, src_blkaddr,
1425                                         do_replace, src, olen);
1426                 if (ret)
1427                         goto roll_back;
1428
1429                 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
1430                                         do_replace, src, dst, olen, full);
1431                 if (ret)
1432                         goto roll_back;
1433
1434                 src += olen;
1435                 dst += olen;
1436                 len -= olen;
1437
1438                 kvfree(src_blkaddr);
1439                 kvfree(do_replace);
1440         }
1441         return 0;
1442
1443 roll_back:
1444         __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
1445         kvfree(src_blkaddr);
1446         kvfree(do_replace);
1447         return ret;
1448 }
1449
1450 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
1451 {
1452         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1453         pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1454         pgoff_t start = offset >> PAGE_SHIFT;
1455         pgoff_t end = (offset + len) >> PAGE_SHIFT;
1456         int ret;
1457
1458         f2fs_balance_fs(sbi, true);
1459
1460         /* avoid gc operation during block exchange */
1461         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1462         filemap_invalidate_lock(inode->i_mapping);
1463
1464         f2fs_lock_op(sbi);
1465         f2fs_drop_extent_tree(inode);
1466         truncate_pagecache(inode, offset);
1467         ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
1468         f2fs_unlock_op(sbi);
1469
1470         filemap_invalidate_unlock(inode->i_mapping);
1471         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1472         return ret;
1473 }
1474
1475 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
1476 {
1477         loff_t new_size;
1478         int ret;
1479
1480         if (offset + len >= i_size_read(inode))
1481                 return -EINVAL;
1482
1483         /* collapse range should be aligned to block size of f2fs. */
1484         if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1485                 return -EINVAL;
1486
1487         ret = f2fs_convert_inline_inode(inode);
1488         if (ret)
1489                 return ret;
1490
1491         /* write out all dirty pages from offset */
1492         ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1493         if (ret)
1494                 return ret;
1495
1496         ret = f2fs_do_collapse(inode, offset, len);
1497         if (ret)
1498                 return ret;
1499
1500         /* write out all moved pages, if possible */
1501         filemap_invalidate_lock(inode->i_mapping);
1502         filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1503         truncate_pagecache(inode, offset);
1504
1505         new_size = i_size_read(inode) - len;
1506         ret = f2fs_truncate_blocks(inode, new_size, true);
1507         filemap_invalidate_unlock(inode->i_mapping);
1508         if (!ret)
1509                 f2fs_i_size_write(inode, new_size);
1510         return ret;
1511 }
1512
1513 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
1514                                                                 pgoff_t end)
1515 {
1516         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1517         pgoff_t index = start;
1518         unsigned int ofs_in_node = dn->ofs_in_node;
1519         blkcnt_t count = 0;
1520         int ret;
1521
1522         for (; index < end; index++, dn->ofs_in_node++) {
1523                 if (f2fs_data_blkaddr(dn) == NULL_ADDR)
1524                         count++;
1525         }
1526
1527         dn->ofs_in_node = ofs_in_node;
1528         ret = f2fs_reserve_new_blocks(dn, count);
1529         if (ret)
1530                 return ret;
1531
1532         dn->ofs_in_node = ofs_in_node;
1533         for (index = start; index < end; index++, dn->ofs_in_node++) {
1534                 dn->data_blkaddr = f2fs_data_blkaddr(dn);
1535                 /*
1536                  * f2fs_reserve_new_blocks will not guarantee entire block
1537                  * allocation.
1538                  */
1539                 if (dn->data_blkaddr == NULL_ADDR) {
1540                         ret = -ENOSPC;
1541                         break;
1542                 }
1543
1544                 if (dn->data_blkaddr == NEW_ADDR)
1545                         continue;
1546
1547                 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
1548                                         DATA_GENERIC_ENHANCE)) {
1549                         ret = -EFSCORRUPTED;
1550                         break;
1551                 }
1552
1553                 f2fs_invalidate_blocks(sbi, dn->data_blkaddr);
1554                 f2fs_set_data_blkaddr(dn, NEW_ADDR);
1555         }
1556
1557         f2fs_update_read_extent_cache_range(dn, start, 0, index - start);
1558         f2fs_update_age_extent_cache_range(dn, start, index - start);
1559
1560         return ret;
1561 }
1562
1563 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
1564                                                                 int mode)
1565 {
1566         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1567         struct address_space *mapping = inode->i_mapping;
1568         pgoff_t index, pg_start, pg_end;
1569         loff_t new_size = i_size_read(inode);
1570         loff_t off_start, off_end;
1571         int ret = 0;
1572
1573         ret = inode_newsize_ok(inode, (len + offset));
1574         if (ret)
1575                 return ret;
1576
1577         ret = f2fs_convert_inline_inode(inode);
1578         if (ret)
1579                 return ret;
1580
1581         ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
1582         if (ret)
1583                 return ret;
1584
1585         pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1586         pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1587
1588         off_start = offset & (PAGE_SIZE - 1);
1589         off_end = (offset + len) & (PAGE_SIZE - 1);
1590
1591         if (pg_start == pg_end) {
1592                 ret = fill_zero(inode, pg_start, off_start,
1593                                                 off_end - off_start);
1594                 if (ret)
1595                         return ret;
1596
1597                 new_size = max_t(loff_t, new_size, offset + len);
1598         } else {
1599                 if (off_start) {
1600                         ret = fill_zero(inode, pg_start++, off_start,
1601                                                 PAGE_SIZE - off_start);
1602                         if (ret)
1603                                 return ret;
1604
1605                         new_size = max_t(loff_t, new_size,
1606                                         (loff_t)pg_start << PAGE_SHIFT);
1607                 }
1608
1609                 for (index = pg_start; index < pg_end;) {
1610                         struct dnode_of_data dn;
1611                         unsigned int end_offset;
1612                         pgoff_t end;
1613
1614                         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1615                         filemap_invalidate_lock(mapping);
1616
1617                         truncate_pagecache_range(inode,
1618                                 (loff_t)index << PAGE_SHIFT,
1619                                 ((loff_t)pg_end << PAGE_SHIFT) - 1);
1620
1621                         f2fs_lock_op(sbi);
1622
1623                         set_new_dnode(&dn, inode, NULL, NULL, 0);
1624                         ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
1625                         if (ret) {
1626                                 f2fs_unlock_op(sbi);
1627                                 filemap_invalidate_unlock(mapping);
1628                                 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1629                                 goto out;
1630                         }
1631
1632                         end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1633                         end = min(pg_end, end_offset - dn.ofs_in_node + index);
1634
1635                         ret = f2fs_do_zero_range(&dn, index, end);
1636                         f2fs_put_dnode(&dn);
1637
1638                         f2fs_unlock_op(sbi);
1639                         filemap_invalidate_unlock(mapping);
1640                         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1641
1642                         f2fs_balance_fs(sbi, dn.node_changed);
1643
1644                         if (ret)
1645                                 goto out;
1646
1647                         index = end;
1648                         new_size = max_t(loff_t, new_size,
1649                                         (loff_t)index << PAGE_SHIFT);
1650                 }
1651
1652                 if (off_end) {
1653                         ret = fill_zero(inode, pg_end, 0, off_end);
1654                         if (ret)
1655                                 goto out;
1656
1657                         new_size = max_t(loff_t, new_size, offset + len);
1658                 }
1659         }
1660
1661 out:
1662         if (new_size > i_size_read(inode)) {
1663                 if (mode & FALLOC_FL_KEEP_SIZE)
1664                         file_set_keep_isize(inode);
1665                 else
1666                         f2fs_i_size_write(inode, new_size);
1667         }
1668         return ret;
1669 }
1670
1671 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1672 {
1673         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1674         struct address_space *mapping = inode->i_mapping;
1675         pgoff_t nr, pg_start, pg_end, delta, idx;
1676         loff_t new_size;
1677         int ret = 0;
1678
1679         new_size = i_size_read(inode) + len;
1680         ret = inode_newsize_ok(inode, new_size);
1681         if (ret)
1682                 return ret;
1683
1684         if (offset >= i_size_read(inode))
1685                 return -EINVAL;
1686
1687         /* insert range should be aligned to block size of f2fs. */
1688         if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1689                 return -EINVAL;
1690
1691         ret = f2fs_convert_inline_inode(inode);
1692         if (ret)
1693                 return ret;
1694
1695         f2fs_balance_fs(sbi, true);
1696
1697         filemap_invalidate_lock(mapping);
1698         ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
1699         filemap_invalidate_unlock(mapping);
1700         if (ret)
1701                 return ret;
1702
1703         /* write out all dirty pages from offset */
1704         ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1705         if (ret)
1706                 return ret;
1707
1708         pg_start = offset >> PAGE_SHIFT;
1709         pg_end = (offset + len) >> PAGE_SHIFT;
1710         delta = pg_end - pg_start;
1711         idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1712
1713         /* avoid gc operation during block exchange */
1714         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1715         filemap_invalidate_lock(mapping);
1716         truncate_pagecache(inode, offset);
1717
1718         while (!ret && idx > pg_start) {
1719                 nr = idx - pg_start;
1720                 if (nr > delta)
1721                         nr = delta;
1722                 idx -= nr;
1723
1724                 f2fs_lock_op(sbi);
1725                 f2fs_drop_extent_tree(inode);
1726
1727                 ret = __exchange_data_block(inode, inode, idx,
1728                                         idx + delta, nr, false);
1729                 f2fs_unlock_op(sbi);
1730         }
1731         filemap_invalidate_unlock(mapping);
1732         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1733         if (ret)
1734                 return ret;
1735
1736         /* write out all moved pages, if possible */
1737         filemap_invalidate_lock(mapping);
1738         ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1739         truncate_pagecache(inode, offset);
1740         filemap_invalidate_unlock(mapping);
1741
1742         if (!ret)
1743                 f2fs_i_size_write(inode, new_size);
1744         return ret;
1745 }
1746
1747 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
1748                                         loff_t len, int mode)
1749 {
1750         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1751         struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
1752                         .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
1753                         .m_may_create = true };
1754         struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
1755                         .init_gc_type = FG_GC,
1756                         .should_migrate_blocks = false,
1757                         .err_gc_skipped = true,
1758                         .nr_free_secs = 0 };
1759         pgoff_t pg_start, pg_end;
1760         loff_t new_size;
1761         loff_t off_end;
1762         block_t expanded = 0;
1763         int err;
1764
1765         err = inode_newsize_ok(inode, (len + offset));
1766         if (err)
1767                 return err;
1768
1769         err = f2fs_convert_inline_inode(inode);
1770         if (err)
1771                 return err;
1772
1773         f2fs_balance_fs(sbi, true);
1774
1775         pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
1776         pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
1777         off_end = (offset + len) & (PAGE_SIZE - 1);
1778
1779         map.m_lblk = pg_start;
1780         map.m_len = pg_end - pg_start;
1781         if (off_end)
1782                 map.m_len++;
1783
1784         if (!map.m_len)
1785                 return 0;
1786
1787         if (f2fs_is_pinned_file(inode)) {
1788                 block_t sec_blks = CAP_BLKS_PER_SEC(sbi);
1789                 block_t sec_len = roundup(map.m_len, sec_blks);
1790
1791                 map.m_len = sec_blks;
1792 next_alloc:
1793                 if (has_not_enough_free_secs(sbi, 0,
1794                         GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
1795                         f2fs_down_write(&sbi->gc_lock);
1796                         stat_inc_gc_call_count(sbi, FOREGROUND);
1797                         err = f2fs_gc(sbi, &gc_control);
1798                         if (err && err != -ENODATA)
1799                                 goto out_err;
1800                 }
1801
1802                 f2fs_down_write(&sbi->pin_sem);
1803
1804                 err = f2fs_allocate_pinning_section(sbi);
1805                 if (err) {
1806                         f2fs_up_write(&sbi->pin_sem);
1807                         goto out_err;
1808                 }
1809
1810                 map.m_seg_type = CURSEG_COLD_DATA_PINNED;
1811                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
1812                 file_dont_truncate(inode);
1813
1814                 f2fs_up_write(&sbi->pin_sem);
1815
1816                 expanded += map.m_len;
1817                 sec_len -= map.m_len;
1818                 map.m_lblk += map.m_len;
1819                 if (!err && sec_len)
1820                         goto next_alloc;
1821
1822                 map.m_len = expanded;
1823         } else {
1824                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO);
1825                 expanded = map.m_len;
1826         }
1827 out_err:
1828         if (err) {
1829                 pgoff_t last_off;
1830
1831                 if (!expanded)
1832                         return err;
1833
1834                 last_off = pg_start + expanded - 1;
1835
1836                 /* update new size to the failed position */
1837                 new_size = (last_off == pg_end) ? offset + len :
1838                                         (loff_t)(last_off + 1) << PAGE_SHIFT;
1839         } else {
1840                 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
1841         }
1842
1843         if (new_size > i_size_read(inode)) {
1844                 if (mode & FALLOC_FL_KEEP_SIZE)
1845                         file_set_keep_isize(inode);
1846                 else
1847                         f2fs_i_size_write(inode, new_size);
1848         }
1849
1850         return err;
1851 }
1852
1853 static long f2fs_fallocate(struct file *file, int mode,
1854                                 loff_t offset, loff_t len)
1855 {
1856         struct inode *inode = file_inode(file);
1857         long ret = 0;
1858
1859         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
1860                 return -EIO;
1861         if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
1862                 return -ENOSPC;
1863         if (!f2fs_is_compress_backend_ready(inode))
1864                 return -EOPNOTSUPP;
1865
1866         /* f2fs only support ->fallocate for regular file */
1867         if (!S_ISREG(inode->i_mode))
1868                 return -EINVAL;
1869
1870         if (IS_ENCRYPTED(inode) &&
1871                 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
1872                 return -EOPNOTSUPP;
1873
1874         if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
1875                         FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
1876                         FALLOC_FL_INSERT_RANGE))
1877                 return -EOPNOTSUPP;
1878
1879         inode_lock(inode);
1880
1881         /*
1882          * Pinned file should not support partial truncation since the block
1883          * can be used by applications.
1884          */
1885         if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
1886                 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
1887                         FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) {
1888                 ret = -EOPNOTSUPP;
1889                 goto out;
1890         }
1891
1892         ret = file_modified(file);
1893         if (ret)
1894                 goto out;
1895
1896         /*
1897          * wait for inflight dio, blocks should be removed after IO
1898          * completion.
1899          */
1900         inode_dio_wait(inode);
1901
1902         if (mode & FALLOC_FL_PUNCH_HOLE) {
1903                 if (offset >= inode->i_size)
1904                         goto out;
1905
1906                 ret = f2fs_punch_hole(inode, offset, len);
1907         } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
1908                 ret = f2fs_collapse_range(inode, offset, len);
1909         } else if (mode & FALLOC_FL_ZERO_RANGE) {
1910                 ret = f2fs_zero_range(inode, offset, len, mode);
1911         } else if (mode & FALLOC_FL_INSERT_RANGE) {
1912                 ret = f2fs_insert_range(inode, offset, len);
1913         } else {
1914                 ret = f2fs_expand_inode_data(inode, offset, len, mode);
1915         }
1916
1917         if (!ret) {
1918                 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
1919                 f2fs_mark_inode_dirty_sync(inode, false);
1920                 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1921         }
1922
1923 out:
1924         inode_unlock(inode);
1925
1926         trace_f2fs_fallocate(inode, mode, offset, len, ret);
1927         return ret;
1928 }
1929
1930 static int f2fs_release_file(struct inode *inode, struct file *filp)
1931 {
1932         /*
1933          * f2fs_release_file is called at every close calls. So we should
1934          * not drop any inmemory pages by close called by other process.
1935          */
1936         if (!(filp->f_mode & FMODE_WRITE) ||
1937                         atomic_read(&inode->i_writecount) != 1)
1938                 return 0;
1939
1940         inode_lock(inode);
1941         f2fs_abort_atomic_write(inode, true);
1942         inode_unlock(inode);
1943
1944         return 0;
1945 }
1946
1947 static int f2fs_file_flush(struct file *file, fl_owner_t id)
1948 {
1949         struct inode *inode = file_inode(file);
1950
1951         /*
1952          * If the process doing a transaction is crashed, we should do
1953          * roll-back. Otherwise, other reader/write can see corrupted database
1954          * until all the writers close its file. Since this should be done
1955          * before dropping file lock, it needs to do in ->flush.
1956          */
1957         if (F2FS_I(inode)->atomic_write_task == current &&
1958                                 (current->flags & PF_EXITING)) {
1959                 inode_lock(inode);
1960                 f2fs_abort_atomic_write(inode, true);
1961                 inode_unlock(inode);
1962         }
1963
1964         return 0;
1965 }
1966
1967 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
1968 {
1969         struct f2fs_inode_info *fi = F2FS_I(inode);
1970         u32 masked_flags = fi->i_flags & mask;
1971
1972         /* mask can be shrunk by flags_valid selector */
1973         iflags &= mask;
1974
1975         /* Is it quota file? Do not allow user to mess with it */
1976         if (IS_NOQUOTA(inode))
1977                 return -EPERM;
1978
1979         if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) {
1980                 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
1981                         return -EOPNOTSUPP;
1982                 if (!f2fs_empty_dir(inode))
1983                         return -ENOTEMPTY;
1984         }
1985
1986         if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) {
1987                 if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
1988                         return -EOPNOTSUPP;
1989                 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL))
1990                         return -EINVAL;
1991         }
1992
1993         if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
1994                 if (masked_flags & F2FS_COMPR_FL) {
1995                         if (!f2fs_disable_compressed_file(inode))
1996                                 return -EINVAL;
1997                 } else {
1998                         /* try to convert inline_data to support compression */
1999                         int err = f2fs_convert_inline_inode(inode);
2000                         if (err)
2001                                 return err;
2002
2003                         f2fs_down_write(&fi->i_sem);
2004                         if (!f2fs_may_compress(inode) ||
2005                                         (S_ISREG(inode->i_mode) &&
2006                                         F2FS_HAS_BLOCKS(inode))) {
2007                                 f2fs_up_write(&fi->i_sem);
2008                                 return -EINVAL;
2009                         }
2010                         err = set_compress_context(inode);
2011                         f2fs_up_write(&fi->i_sem);
2012
2013                         if (err)
2014                                 return err;
2015                 }
2016         }
2017
2018         fi->i_flags = iflags | (fi->i_flags & ~mask);
2019         f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
2020                                         (fi->i_flags & F2FS_NOCOMP_FL));
2021
2022         if (fi->i_flags & F2FS_PROJINHERIT_FL)
2023                 set_inode_flag(inode, FI_PROJ_INHERIT);
2024         else
2025                 clear_inode_flag(inode, FI_PROJ_INHERIT);
2026
2027         inode_set_ctime_current(inode);
2028         f2fs_set_inode_flags(inode);
2029         f2fs_mark_inode_dirty_sync(inode, true);
2030         return 0;
2031 }
2032
2033 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */
2034
2035 /*
2036  * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
2037  * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
2038  * F2FS_GETTABLE_FS_FL.  To also make it settable via FS_IOC_SETFLAGS, also add
2039  * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
2040  *
2041  * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and
2042  * FS_IOC_FSSETXATTR is done by the VFS.
2043  */
2044
2045 static const struct {
2046         u32 iflag;
2047         u32 fsflag;
2048 } f2fs_fsflags_map[] = {
2049         { F2FS_COMPR_FL,        FS_COMPR_FL },
2050         { F2FS_SYNC_FL,         FS_SYNC_FL },
2051         { F2FS_IMMUTABLE_FL,    FS_IMMUTABLE_FL },
2052         { F2FS_APPEND_FL,       FS_APPEND_FL },
2053         { F2FS_NODUMP_FL,       FS_NODUMP_FL },
2054         { F2FS_NOATIME_FL,      FS_NOATIME_FL },
2055         { F2FS_NOCOMP_FL,       FS_NOCOMP_FL },
2056         { F2FS_INDEX_FL,        FS_INDEX_FL },
2057         { F2FS_DIRSYNC_FL,      FS_DIRSYNC_FL },
2058         { F2FS_PROJINHERIT_FL,  FS_PROJINHERIT_FL },
2059         { F2FS_CASEFOLD_FL,     FS_CASEFOLD_FL },
2060 };
2061
2062 #define F2FS_GETTABLE_FS_FL (           \
2063                 FS_COMPR_FL |           \
2064                 FS_SYNC_FL |            \
2065                 FS_IMMUTABLE_FL |       \
2066                 FS_APPEND_FL |          \
2067                 FS_NODUMP_FL |          \
2068                 FS_NOATIME_FL |         \
2069                 FS_NOCOMP_FL |          \
2070                 FS_INDEX_FL |           \
2071                 FS_DIRSYNC_FL |         \
2072                 FS_PROJINHERIT_FL |     \
2073                 FS_ENCRYPT_FL |         \
2074                 FS_INLINE_DATA_FL |     \
2075                 FS_NOCOW_FL |           \
2076                 FS_VERITY_FL |          \
2077                 FS_CASEFOLD_FL)
2078
2079 #define F2FS_SETTABLE_FS_FL (           \
2080                 FS_COMPR_FL |           \
2081                 FS_SYNC_FL |            \
2082                 FS_IMMUTABLE_FL |       \
2083                 FS_APPEND_FL |          \
2084                 FS_NODUMP_FL |          \
2085                 FS_NOATIME_FL |         \
2086                 FS_NOCOMP_FL |          \
2087                 FS_DIRSYNC_FL |         \
2088                 FS_PROJINHERIT_FL |     \
2089                 FS_CASEFOLD_FL)
2090
2091 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
2092 static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
2093 {
2094         u32 fsflags = 0;
2095         int i;
2096
2097         for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2098                 if (iflags & f2fs_fsflags_map[i].iflag)
2099                         fsflags |= f2fs_fsflags_map[i].fsflag;
2100
2101         return fsflags;
2102 }
2103
2104 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
2105 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
2106 {
2107         u32 iflags = 0;
2108         int i;
2109
2110         for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2111                 if (fsflags & f2fs_fsflags_map[i].fsflag)
2112                         iflags |= f2fs_fsflags_map[i].iflag;
2113
2114         return iflags;
2115 }
2116
2117 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
2118 {
2119         struct inode *inode = file_inode(filp);
2120
2121         return put_user(inode->i_generation, (int __user *)arg);
2122 }
2123
2124 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
2125 {
2126         struct inode *inode = file_inode(filp);
2127         struct mnt_idmap *idmap = file_mnt_idmap(filp);
2128         struct f2fs_inode_info *fi = F2FS_I(inode);
2129         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2130         loff_t isize;
2131         int ret;
2132
2133         if (!(filp->f_mode & FMODE_WRITE))
2134                 return -EBADF;
2135
2136         if (!inode_owner_or_capable(idmap, inode))
2137                 return -EACCES;
2138
2139         if (!S_ISREG(inode->i_mode))
2140                 return -EINVAL;
2141
2142         if (filp->f_flags & O_DIRECT)
2143                 return -EINVAL;
2144
2145         ret = mnt_want_write_file(filp);
2146         if (ret)
2147                 return ret;
2148
2149         inode_lock(inode);
2150
2151         if (!f2fs_disable_compressed_file(inode) ||
2152                         f2fs_is_pinned_file(inode)) {
2153                 ret = -EINVAL;
2154                 goto out;
2155         }
2156
2157         if (f2fs_is_atomic_file(inode))
2158                 goto out;
2159
2160         ret = f2fs_convert_inline_inode(inode);
2161         if (ret)
2162                 goto out;
2163
2164         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
2165         f2fs_down_write(&fi->i_gc_rwsem[READ]);
2166
2167         /*
2168          * Should wait end_io to count F2FS_WB_CP_DATA correctly by
2169          * f2fs_is_atomic_file.
2170          */
2171         if (get_dirty_pages(inode))
2172                 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u",
2173                           inode->i_ino, get_dirty_pages(inode));
2174         ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
2175         if (ret)
2176                 goto out_unlock;
2177
2178         /* Check if the inode already has a COW inode */
2179         if (fi->cow_inode == NULL) {
2180                 /* Create a COW inode for atomic write */
2181                 struct dentry *dentry = file_dentry(filp);
2182                 struct inode *dir = d_inode(dentry->d_parent);
2183
2184                 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode);
2185                 if (ret)
2186                         goto out_unlock;
2187
2188                 set_inode_flag(fi->cow_inode, FI_COW_FILE);
2189                 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
2190
2191                 /* Set the COW inode's atomic_inode to the atomic inode */
2192                 F2FS_I(fi->cow_inode)->atomic_inode = inode;
2193         } else {
2194                 /* Reuse the already created COW inode */
2195                 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode));
2196
2197                 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1);
2198
2199                 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
2200                 if (ret)
2201                         goto out_unlock;
2202         }
2203
2204         f2fs_write_inode(inode, NULL);
2205
2206         stat_inc_atomic_inode(inode);
2207
2208         set_inode_flag(inode, FI_ATOMIC_FILE);
2209
2210         isize = i_size_read(inode);
2211         fi->original_i_size = isize;
2212         if (truncate) {
2213                 set_inode_flag(inode, FI_ATOMIC_REPLACE);
2214                 truncate_inode_pages_final(inode->i_mapping);
2215                 f2fs_i_size_write(inode, 0);
2216                 isize = 0;
2217         }
2218         f2fs_i_size_write(fi->cow_inode, isize);
2219
2220 out_unlock:
2221         f2fs_up_write(&fi->i_gc_rwsem[READ]);
2222         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
2223         if (ret)
2224                 goto out;
2225
2226         f2fs_update_time(sbi, REQ_TIME);
2227         fi->atomic_write_task = current;
2228         stat_update_max_atomic_write(inode);
2229         fi->atomic_write_cnt = 0;
2230 out:
2231         inode_unlock(inode);
2232         mnt_drop_write_file(filp);
2233         return ret;
2234 }
2235
2236 static int f2fs_ioc_commit_atomic_write(struct file *filp)
2237 {
2238         struct inode *inode = file_inode(filp);
2239         struct mnt_idmap *idmap = file_mnt_idmap(filp);
2240         int ret;
2241
2242         if (!(filp->f_mode & FMODE_WRITE))
2243                 return -EBADF;
2244
2245         if (!inode_owner_or_capable(idmap, inode))
2246                 return -EACCES;
2247
2248         ret = mnt_want_write_file(filp);
2249         if (ret)
2250                 return ret;
2251
2252         f2fs_balance_fs(F2FS_I_SB(inode), true);
2253
2254         inode_lock(inode);
2255
2256         if (f2fs_is_atomic_file(inode)) {
2257                 ret = f2fs_commit_atomic_write(inode);
2258                 if (!ret)
2259                         ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
2260
2261                 f2fs_abort_atomic_write(inode, ret);
2262         } else {
2263                 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
2264         }
2265
2266         inode_unlock(inode);
2267         mnt_drop_write_file(filp);
2268         return ret;
2269 }
2270
2271 static int f2fs_ioc_abort_atomic_write(struct file *filp)
2272 {
2273         struct inode *inode = file_inode(filp);
2274         struct mnt_idmap *idmap = file_mnt_idmap(filp);
2275         int ret;
2276
2277         if (!(filp->f_mode & FMODE_WRITE))
2278                 return -EBADF;
2279
2280         if (!inode_owner_or_capable(idmap, inode))
2281                 return -EACCES;
2282
2283         ret = mnt_want_write_file(filp);
2284         if (ret)
2285                 return ret;
2286
2287         inode_lock(inode);
2288
2289         f2fs_abort_atomic_write(inode, true);
2290
2291         inode_unlock(inode);
2292
2293         mnt_drop_write_file(filp);
2294         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2295         return ret;
2296 }
2297
2298 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag,
2299                                                 bool readonly, bool need_lock)
2300 {
2301         struct super_block *sb = sbi->sb;
2302         int ret = 0;
2303
2304         switch (flag) {
2305         case F2FS_GOING_DOWN_FULLSYNC:
2306                 ret = bdev_freeze(sb->s_bdev);
2307                 if (ret)
2308                         goto out;
2309                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2310                 bdev_thaw(sb->s_bdev);
2311                 break;
2312         case F2FS_GOING_DOWN_METASYNC:
2313                 /* do checkpoint only */
2314                 ret = f2fs_sync_fs(sb, 1);
2315                 if (ret) {
2316                         if (ret == -EIO)
2317                                 ret = 0;
2318                         goto out;
2319                 }
2320                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2321                 break;
2322         case F2FS_GOING_DOWN_NOSYNC:
2323                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2324                 break;
2325         case F2FS_GOING_DOWN_METAFLUSH:
2326                 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
2327                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2328                 break;
2329         case F2FS_GOING_DOWN_NEED_FSCK:
2330                 set_sbi_flag(sbi, SBI_NEED_FSCK);
2331                 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
2332                 set_sbi_flag(sbi, SBI_IS_DIRTY);
2333                 /* do checkpoint only */
2334                 ret = f2fs_sync_fs(sb, 1);
2335                 if (ret == -EIO)
2336                         ret = 0;
2337                 goto out;
2338         default:
2339                 ret = -EINVAL;
2340                 goto out;
2341         }
2342
2343         if (readonly)
2344                 goto out;
2345
2346         /* grab sb->s_umount to avoid racing w/ remount() */
2347         if (need_lock)
2348                 down_read(&sbi->sb->s_umount);
2349
2350         f2fs_stop_gc_thread(sbi);
2351         f2fs_stop_discard_thread(sbi);
2352
2353         f2fs_drop_discard_cmd(sbi);
2354         clear_opt(sbi, DISCARD);
2355
2356         if (need_lock)
2357                 up_read(&sbi->sb->s_umount);
2358
2359         f2fs_update_time(sbi, REQ_TIME);
2360 out:
2361
2362         trace_f2fs_shutdown(sbi, flag, ret);
2363
2364         return ret;
2365 }
2366
2367 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
2368 {
2369         struct inode *inode = file_inode(filp);
2370         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2371         __u32 in;
2372         int ret;
2373         bool need_drop = false, readonly = false;
2374
2375         if (!capable(CAP_SYS_ADMIN))
2376                 return -EPERM;
2377
2378         if (get_user(in, (__u32 __user *)arg))
2379                 return -EFAULT;
2380
2381         if (in != F2FS_GOING_DOWN_FULLSYNC) {
2382                 ret = mnt_want_write_file(filp);
2383                 if (ret) {
2384                         if (ret != -EROFS)
2385                                 return ret;
2386
2387                         /* fallback to nosync shutdown for readonly fs */
2388                         in = F2FS_GOING_DOWN_NOSYNC;
2389                         readonly = true;
2390                 } else {
2391                         need_drop = true;
2392                 }
2393         }
2394
2395         ret = f2fs_do_shutdown(sbi, in, readonly, true);
2396
2397         if (need_drop)
2398                 mnt_drop_write_file(filp);
2399
2400         return ret;
2401 }
2402
2403 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
2404 {
2405         struct inode *inode = file_inode(filp);
2406         struct super_block *sb = inode->i_sb;
2407         struct fstrim_range range;
2408         int ret;
2409
2410         if (!capable(CAP_SYS_ADMIN))
2411                 return -EPERM;
2412
2413         if (!f2fs_hw_support_discard(F2FS_SB(sb)))
2414                 return -EOPNOTSUPP;
2415
2416         if (copy_from_user(&range, (struct fstrim_range __user *)arg,
2417                                 sizeof(range)))
2418                 return -EFAULT;
2419
2420         ret = mnt_want_write_file(filp);
2421         if (ret)
2422                 return ret;
2423
2424         range.minlen = max((unsigned int)range.minlen,
2425                            bdev_discard_granularity(sb->s_bdev));
2426         ret = f2fs_trim_fs(F2FS_SB(sb), &range);
2427         mnt_drop_write_file(filp);
2428         if (ret < 0)
2429                 return ret;
2430
2431         if (copy_to_user((struct fstrim_range __user *)arg, &range,
2432                                 sizeof(range)))
2433                 return -EFAULT;
2434         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2435         return 0;
2436 }
2437
2438 static bool uuid_is_nonzero(__u8 u[16])
2439 {
2440         int i;
2441
2442         for (i = 0; i < 16; i++)
2443                 if (u[i])
2444                         return true;
2445         return false;
2446 }
2447
2448 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
2449 {
2450         struct inode *inode = file_inode(filp);
2451         int ret;
2452
2453         if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
2454                 return -EOPNOTSUPP;
2455
2456         ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
2457         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2458         return ret;
2459 }
2460
2461 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
2462 {
2463         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2464                 return -EOPNOTSUPP;
2465         return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
2466 }
2467
2468 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
2469 {
2470         struct inode *inode = file_inode(filp);
2471         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2472         u8 encrypt_pw_salt[16];
2473         int err;
2474
2475         if (!f2fs_sb_has_encrypt(sbi))
2476                 return -EOPNOTSUPP;
2477
2478         err = mnt_want_write_file(filp);
2479         if (err)
2480                 return err;
2481
2482         f2fs_down_write(&sbi->sb_lock);
2483
2484         if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
2485                 goto got_it;
2486
2487         /* update superblock with uuid */
2488         generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
2489
2490         err = f2fs_commit_super(sbi, false);
2491         if (err) {
2492                 /* undo new data */
2493                 memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
2494                 goto out_err;
2495         }
2496 got_it:
2497         memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16);
2498 out_err:
2499         f2fs_up_write(&sbi->sb_lock);
2500         mnt_drop_write_file(filp);
2501
2502         if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16))
2503                 err = -EFAULT;
2504
2505         return err;
2506 }
2507
2508 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp,
2509                                              unsigned long arg)
2510 {
2511         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2512                 return -EOPNOTSUPP;
2513
2514         return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
2515 }
2516
2517 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg)
2518 {
2519         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2520                 return -EOPNOTSUPP;
2521
2522         return fscrypt_ioctl_add_key(filp, (void __user *)arg);
2523 }
2524
2525 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg)
2526 {
2527         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2528                 return -EOPNOTSUPP;
2529
2530         return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
2531 }
2532
2533 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp,
2534                                                     unsigned long arg)
2535 {
2536         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2537                 return -EOPNOTSUPP;
2538
2539         return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg);
2540 }
2541
2542 static int f2fs_ioc_get_encryption_key_status(struct file *filp,
2543                                               unsigned long arg)
2544 {
2545         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2546                 return -EOPNOTSUPP;
2547
2548         return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
2549 }
2550
2551 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg)
2552 {
2553         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2554                 return -EOPNOTSUPP;
2555
2556         return fscrypt_ioctl_get_nonce(filp, (void __user *)arg);
2557 }
2558
2559 static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
2560 {
2561         struct inode *inode = file_inode(filp);
2562         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2563         struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
2564                         .no_bg_gc = false,
2565                         .should_migrate_blocks = false,
2566                         .nr_free_secs = 0 };
2567         __u32 sync;
2568         int ret;
2569
2570         if (!capable(CAP_SYS_ADMIN))
2571                 return -EPERM;
2572
2573         if (get_user(sync, (__u32 __user *)arg))
2574                 return -EFAULT;
2575
2576         if (f2fs_readonly(sbi->sb))
2577                 return -EROFS;
2578
2579         ret = mnt_want_write_file(filp);
2580         if (ret)
2581                 return ret;
2582
2583         if (!sync) {
2584                 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
2585                         ret = -EBUSY;
2586                         goto out;
2587                 }
2588         } else {
2589                 f2fs_down_write(&sbi->gc_lock);
2590         }
2591
2592         gc_control.init_gc_type = sync ? FG_GC : BG_GC;
2593         gc_control.err_gc_skipped = sync;
2594         stat_inc_gc_call_count(sbi, FOREGROUND);
2595         ret = f2fs_gc(sbi, &gc_control);
2596 out:
2597         mnt_drop_write_file(filp);
2598         return ret;
2599 }
2600
2601 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
2602 {
2603         struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
2604         struct f2fs_gc_control gc_control = {
2605                         .init_gc_type = range->sync ? FG_GC : BG_GC,
2606                         .no_bg_gc = false,
2607                         .should_migrate_blocks = false,
2608                         .err_gc_skipped = range->sync,
2609                         .nr_free_secs = 0 };
2610         u64 end;
2611         int ret;
2612
2613         if (!capable(CAP_SYS_ADMIN))
2614                 return -EPERM;
2615         if (f2fs_readonly(sbi->sb))
2616                 return -EROFS;
2617
2618         end = range->start + range->len;
2619         if (end < range->start || range->start < MAIN_BLKADDR(sbi) ||
2620                                         end >= MAX_BLKADDR(sbi))
2621                 return -EINVAL;
2622
2623         ret = mnt_want_write_file(filp);
2624         if (ret)
2625                 return ret;
2626
2627 do_more:
2628         if (!range->sync) {
2629                 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
2630                         ret = -EBUSY;
2631                         goto out;
2632                 }
2633         } else {
2634                 f2fs_down_write(&sbi->gc_lock);
2635         }
2636
2637         gc_control.victim_segno = GET_SEGNO(sbi, range->start);
2638         stat_inc_gc_call_count(sbi, FOREGROUND);
2639         ret = f2fs_gc(sbi, &gc_control);
2640         if (ret) {
2641                 if (ret == -EBUSY)
2642                         ret = -EAGAIN;
2643                 goto out;
2644         }
2645         range->start += CAP_BLKS_PER_SEC(sbi);
2646         if (range->start <= end)
2647                 goto do_more;
2648 out:
2649         mnt_drop_write_file(filp);
2650         return ret;
2651 }
2652
2653 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
2654 {
2655         struct f2fs_gc_range range;
2656
2657         if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
2658                                                         sizeof(range)))
2659                 return -EFAULT;
2660         return __f2fs_ioc_gc_range(filp, &range);
2661 }
2662
2663 static int f2fs_ioc_write_checkpoint(struct file *filp)
2664 {
2665         struct inode *inode = file_inode(filp);
2666         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2667         int ret;
2668
2669         if (!capable(CAP_SYS_ADMIN))
2670                 return -EPERM;
2671
2672         if (f2fs_readonly(sbi->sb))
2673                 return -EROFS;
2674
2675         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2676                 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
2677                 return -EINVAL;
2678         }
2679
2680         ret = mnt_want_write_file(filp);
2681         if (ret)
2682                 return ret;
2683
2684         ret = f2fs_sync_fs(sbi->sb, 1);
2685
2686         mnt_drop_write_file(filp);
2687         return ret;
2688 }
2689
2690 static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
2691                                         struct file *filp,
2692                                         struct f2fs_defragment *range)
2693 {
2694         struct inode *inode = file_inode(filp);
2695         struct f2fs_map_blocks map = { .m_next_extent = NULL,
2696                                         .m_seg_type = NO_CHECK_TYPE,
2697                                         .m_may_create = false };
2698         struct extent_info ei = {};
2699         pgoff_t pg_start, pg_end, next_pgofs;
2700         unsigned int total = 0, sec_num;
2701         block_t blk_end = 0;
2702         bool fragmented = false;
2703         int err;
2704
2705         f2fs_balance_fs(sbi, true);
2706
2707         inode_lock(inode);
2708         pg_start = range->start >> PAGE_SHIFT;
2709         pg_end = min_t(pgoff_t,
2710                                 (range->start + range->len) >> PAGE_SHIFT,
2711                                 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE));
2712
2713         if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) ||
2714                 f2fs_is_atomic_file(inode)) {
2715                 err = -EINVAL;
2716                 goto unlock_out;
2717         }
2718
2719         /* if in-place-update policy is enabled, don't waste time here */
2720         set_inode_flag(inode, FI_OPU_WRITE);
2721         if (f2fs_should_update_inplace(inode, NULL)) {
2722                 err = -EINVAL;
2723                 goto out;
2724         }
2725
2726         /* writeback all dirty pages in the range */
2727         err = filemap_write_and_wait_range(inode->i_mapping,
2728                                                 pg_start << PAGE_SHIFT,
2729                                                 (pg_end << PAGE_SHIFT) - 1);
2730         if (err)
2731                 goto out;
2732
2733         /*
2734          * lookup mapping info in extent cache, skip defragmenting if physical
2735          * block addresses are continuous.
2736          */
2737         if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
2738                 if ((pgoff_t)ei.fofs + ei.len >= pg_end)
2739                         goto out;
2740         }
2741
2742         map.m_lblk = pg_start;
2743         map.m_next_pgofs = &next_pgofs;
2744
2745         /*
2746          * lookup mapping info in dnode page cache, skip defragmenting if all
2747          * physical block addresses are continuous even if there are hole(s)
2748          * in logical blocks.
2749          */
2750         while (map.m_lblk < pg_end) {
2751                 map.m_len = pg_end - map.m_lblk;
2752                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2753                 if (err)
2754                         goto out;
2755
2756                 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2757                         map.m_lblk = next_pgofs;
2758                         continue;
2759                 }
2760
2761                 if (blk_end && blk_end != map.m_pblk)
2762                         fragmented = true;
2763
2764                 /* record total count of block that we're going to move */
2765                 total += map.m_len;
2766
2767                 blk_end = map.m_pblk + map.m_len;
2768
2769                 map.m_lblk += map.m_len;
2770         }
2771
2772         if (!fragmented) {
2773                 total = 0;
2774                 goto out;
2775         }
2776
2777         sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi));
2778
2779         /*
2780          * make sure there are enough free section for LFS allocation, this can
2781          * avoid defragment running in SSR mode when free section are allocated
2782          * intensively
2783          */
2784         if (has_not_enough_free_secs(sbi, 0, sec_num)) {
2785                 err = -EAGAIN;
2786                 goto out;
2787         }
2788
2789         map.m_lblk = pg_start;
2790         map.m_len = pg_end - pg_start;
2791         total = 0;
2792
2793         while (map.m_lblk < pg_end) {
2794                 pgoff_t idx;
2795                 int cnt = 0;
2796
2797 do_map:
2798                 map.m_len = pg_end - map.m_lblk;
2799                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2800                 if (err)
2801                         goto clear_out;
2802
2803                 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2804                         map.m_lblk = next_pgofs;
2805                         goto check;
2806                 }
2807
2808                 set_inode_flag(inode, FI_SKIP_WRITES);
2809
2810                 idx = map.m_lblk;
2811                 while (idx < map.m_lblk + map.m_len &&
2812                                                 cnt < BLKS_PER_SEG(sbi)) {
2813                         struct page *page;
2814
2815                         page = f2fs_get_lock_data_page(inode, idx, true);
2816                         if (IS_ERR(page)) {
2817                                 err = PTR_ERR(page);
2818                                 goto clear_out;
2819                         }
2820
2821                         f2fs_wait_on_page_writeback(page, DATA, true, true);
2822
2823                         set_page_dirty(page);
2824                         set_page_private_gcing(page);
2825                         f2fs_put_page(page, 1);
2826
2827                         idx++;
2828                         cnt++;
2829                         total++;
2830                 }
2831
2832                 map.m_lblk = idx;
2833 check:
2834                 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi))
2835                         goto do_map;
2836
2837                 clear_inode_flag(inode, FI_SKIP_WRITES);
2838
2839                 err = filemap_fdatawrite(inode->i_mapping);
2840                 if (err)
2841                         goto out;
2842         }
2843 clear_out:
2844         clear_inode_flag(inode, FI_SKIP_WRITES);
2845 out:
2846         clear_inode_flag(inode, FI_OPU_WRITE);
2847 unlock_out:
2848         inode_unlock(inode);
2849         if (!err)
2850                 range->len = (u64)total << PAGE_SHIFT;
2851         return err;
2852 }
2853
2854 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
2855 {
2856         struct inode *inode = file_inode(filp);
2857         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2858         struct f2fs_defragment range;
2859         int err;
2860
2861         if (!capable(CAP_SYS_ADMIN))
2862                 return -EPERM;
2863
2864         if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode))
2865                 return -EINVAL;
2866
2867         if (f2fs_readonly(sbi->sb))
2868                 return -EROFS;
2869
2870         if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
2871                                                         sizeof(range)))
2872                 return -EFAULT;
2873
2874         /* verify alignment of offset & size */
2875         if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
2876                 return -EINVAL;
2877
2878         if (unlikely((range.start + range.len) >> PAGE_SHIFT >
2879                                         max_file_blocks(inode)))
2880                 return -EINVAL;
2881
2882         err = mnt_want_write_file(filp);
2883         if (err)
2884                 return err;
2885
2886         err = f2fs_defragment_range(sbi, filp, &range);
2887         mnt_drop_write_file(filp);
2888
2889         if (range.len)
2890                 f2fs_update_time(sbi, REQ_TIME);
2891         if (err < 0)
2892                 return err;
2893
2894         if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
2895                                                         sizeof(range)))
2896                 return -EFAULT;
2897
2898         return 0;
2899 }
2900
2901 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
2902                         struct file *file_out, loff_t pos_out, size_t len)
2903 {
2904         struct inode *src = file_inode(file_in);
2905         struct inode *dst = file_inode(file_out);
2906         struct f2fs_sb_info *sbi = F2FS_I_SB(src);
2907         size_t olen = len, dst_max_i_size = 0;
2908         size_t dst_osize;
2909         int ret;
2910
2911         if (file_in->f_path.mnt != file_out->f_path.mnt ||
2912                                 src->i_sb != dst->i_sb)
2913                 return -EXDEV;
2914
2915         if (unlikely(f2fs_readonly(src->i_sb)))
2916                 return -EROFS;
2917
2918         if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
2919                 return -EINVAL;
2920
2921         if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
2922                 return -EOPNOTSUPP;
2923
2924         if (pos_out < 0 || pos_in < 0)
2925                 return -EINVAL;
2926
2927         if (src == dst) {
2928                 if (pos_in == pos_out)
2929                         return 0;
2930                 if (pos_out > pos_in && pos_out < pos_in + len)
2931                         return -EINVAL;
2932         }
2933
2934         inode_lock(src);
2935         if (src != dst) {
2936                 ret = -EBUSY;
2937                 if (!inode_trylock(dst))
2938                         goto out;
2939         }
2940
2941         if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) ||
2942                 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) {
2943                 ret = -EOPNOTSUPP;
2944                 goto out_unlock;
2945         }
2946
2947         if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) {
2948                 ret = -EINVAL;
2949                 goto out_unlock;
2950         }
2951
2952         ret = -EINVAL;
2953         if (pos_in + len > src->i_size || pos_in + len < pos_in)
2954                 goto out_unlock;
2955         if (len == 0)
2956                 olen = len = src->i_size - pos_in;
2957         if (pos_in + len == src->i_size)
2958                 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
2959         if (len == 0) {
2960                 ret = 0;
2961                 goto out_unlock;
2962         }
2963
2964         dst_osize = dst->i_size;
2965         if (pos_out + olen > dst->i_size)
2966                 dst_max_i_size = pos_out + olen;
2967
2968         /* verify the end result is block aligned */
2969         if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) ||
2970                         !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) ||
2971                         !IS_ALIGNED(pos_out, F2FS_BLKSIZE))
2972                 goto out_unlock;
2973
2974         ret = f2fs_convert_inline_inode(src);
2975         if (ret)
2976                 goto out_unlock;
2977
2978         ret = f2fs_convert_inline_inode(dst);
2979         if (ret)
2980                 goto out_unlock;
2981
2982         /* write out all dirty pages from offset */
2983         ret = filemap_write_and_wait_range(src->i_mapping,
2984                                         pos_in, pos_in + len);
2985         if (ret)
2986                 goto out_unlock;
2987
2988         ret = filemap_write_and_wait_range(dst->i_mapping,
2989                                         pos_out, pos_out + len);
2990         if (ret)
2991                 goto out_unlock;
2992
2993         f2fs_balance_fs(sbi, true);
2994
2995         f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
2996         if (src != dst) {
2997                 ret = -EBUSY;
2998                 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
2999                         goto out_src;
3000         }
3001
3002         f2fs_lock_op(sbi);
3003         ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in),
3004                                 F2FS_BYTES_TO_BLK(pos_out),
3005                                 F2FS_BYTES_TO_BLK(len), false);
3006
3007         if (!ret) {
3008                 if (dst_max_i_size)
3009                         f2fs_i_size_write(dst, dst_max_i_size);
3010                 else if (dst_osize != dst->i_size)
3011                         f2fs_i_size_write(dst, dst_osize);
3012         }
3013         f2fs_unlock_op(sbi);
3014
3015         if (src != dst)
3016                 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
3017 out_src:
3018         f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
3019         if (ret)
3020                 goto out_unlock;
3021
3022         inode_set_mtime_to_ts(src, inode_set_ctime_current(src));
3023         f2fs_mark_inode_dirty_sync(src, false);
3024         if (src != dst) {
3025                 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst));
3026                 f2fs_mark_inode_dirty_sync(dst, false);
3027         }
3028         f2fs_update_time(sbi, REQ_TIME);
3029
3030 out_unlock:
3031         if (src != dst)
3032                 inode_unlock(dst);
3033 out:
3034         inode_unlock(src);
3035         return ret;
3036 }
3037
3038 static int __f2fs_ioc_move_range(struct file *filp,
3039                                 struct f2fs_move_range *range)
3040 {
3041         struct fd dst;
3042         int err;
3043
3044         if (!(filp->f_mode & FMODE_READ) ||
3045                         !(filp->f_mode & FMODE_WRITE))
3046                 return -EBADF;
3047
3048         dst = fdget(range->dst_fd);
3049         if (!fd_file(dst))
3050                 return -EBADF;
3051
3052         if (!(fd_file(dst)->f_mode & FMODE_WRITE)) {
3053                 err = -EBADF;
3054                 goto err_out;
3055         }
3056
3057         err = mnt_want_write_file(filp);
3058         if (err)
3059                 goto err_out;
3060
3061         err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst),
3062                                         range->pos_out, range->len);
3063
3064         mnt_drop_write_file(filp);
3065 err_out:
3066         fdput(dst);
3067         return err;
3068 }
3069
3070 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
3071 {
3072         struct f2fs_move_range range;
3073
3074         if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
3075                                                         sizeof(range)))
3076                 return -EFAULT;
3077         return __f2fs_ioc_move_range(filp, &range);
3078 }
3079
3080 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
3081 {
3082         struct inode *inode = file_inode(filp);
3083         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3084         struct sit_info *sm = SIT_I(sbi);
3085         unsigned int start_segno = 0, end_segno = 0;
3086         unsigned int dev_start_segno = 0, dev_end_segno = 0;
3087         struct f2fs_flush_device range;
3088         struct f2fs_gc_control gc_control = {
3089                         .init_gc_type = FG_GC,
3090                         .should_migrate_blocks = true,
3091                         .err_gc_skipped = true,
3092                         .nr_free_secs = 0 };
3093         int ret;
3094
3095         if (!capable(CAP_SYS_ADMIN))
3096                 return -EPERM;
3097
3098         if (f2fs_readonly(sbi->sb))
3099                 return -EROFS;
3100
3101         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3102                 return -EINVAL;
3103
3104         if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
3105                                                         sizeof(range)))
3106                 return -EFAULT;
3107
3108         if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
3109                         __is_large_section(sbi)) {
3110                 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1",
3111                           range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi));
3112                 return -EINVAL;
3113         }
3114
3115         ret = mnt_want_write_file(filp);
3116         if (ret)
3117                 return ret;
3118
3119         if (range.dev_num != 0)
3120                 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
3121         dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
3122
3123         start_segno = sm->last_victim[FLUSH_DEVICE];
3124         if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
3125                 start_segno = dev_start_segno;
3126         end_segno = min(start_segno + range.segments, dev_end_segno);
3127
3128         while (start_segno < end_segno) {
3129                 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
3130                         ret = -EBUSY;
3131                         goto out;
3132                 }
3133                 sm->last_victim[GC_CB] = end_segno + 1;
3134                 sm->last_victim[GC_GREEDY] = end_segno + 1;
3135                 sm->last_victim[ALLOC_NEXT] = end_segno + 1;
3136
3137                 gc_control.victim_segno = start_segno;
3138                 stat_inc_gc_call_count(sbi, FOREGROUND);
3139                 ret = f2fs_gc(sbi, &gc_control);
3140                 if (ret == -EAGAIN)
3141                         ret = 0;
3142                 else if (ret < 0)
3143                         break;
3144                 start_segno++;
3145         }
3146 out:
3147         mnt_drop_write_file(filp);
3148         return ret;
3149 }
3150
3151 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
3152 {
3153         struct inode *inode = file_inode(filp);
3154         u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
3155
3156         /* Must validate to set it with SQLite behavior in Android. */
3157         sb_feature |= F2FS_FEATURE_ATOMIC_WRITE;
3158
3159         return put_user(sb_feature, (u32 __user *)arg);
3160 }
3161
3162 #ifdef CONFIG_QUOTA
3163 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3164 {
3165         struct dquot *transfer_to[MAXQUOTAS] = {};
3166         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3167         struct super_block *sb = sbi->sb;
3168         int err;
3169
3170         transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
3171         if (IS_ERR(transfer_to[PRJQUOTA]))
3172                 return PTR_ERR(transfer_to[PRJQUOTA]);
3173
3174         err = __dquot_transfer(inode, transfer_to);
3175         if (err)
3176                 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
3177         dqput(transfer_to[PRJQUOTA]);
3178         return err;
3179 }
3180
3181 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3182 {
3183         struct f2fs_inode_info *fi = F2FS_I(inode);
3184         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3185         struct f2fs_inode *ri = NULL;
3186         kprojid_t kprojid;
3187         int err;
3188
3189         if (!f2fs_sb_has_project_quota(sbi)) {
3190                 if (projid != F2FS_DEF_PROJID)
3191                         return -EOPNOTSUPP;
3192                 else
3193                         return 0;
3194         }
3195
3196         if (!f2fs_has_extra_attr(inode))
3197                 return -EOPNOTSUPP;
3198
3199         kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
3200
3201         if (projid_eq(kprojid, fi->i_projid))
3202                 return 0;
3203
3204         err = -EPERM;
3205         /* Is it quota file? Do not allow user to mess with it */
3206         if (IS_NOQUOTA(inode))
3207                 return err;
3208
3209         if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
3210                 return -EOVERFLOW;
3211
3212         err = f2fs_dquot_initialize(inode);
3213         if (err)
3214                 return err;
3215
3216         f2fs_lock_op(sbi);
3217         err = f2fs_transfer_project_quota(inode, kprojid);
3218         if (err)
3219                 goto out_unlock;
3220
3221         fi->i_projid = kprojid;
3222         inode_set_ctime_current(inode);
3223         f2fs_mark_inode_dirty_sync(inode, true);
3224 out_unlock:
3225         f2fs_unlock_op(sbi);
3226         return err;
3227 }
3228 #else
3229 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3230 {
3231         return 0;
3232 }
3233
3234 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3235 {
3236         if (projid != F2FS_DEF_PROJID)
3237                 return -EOPNOTSUPP;
3238         return 0;
3239 }
3240 #endif
3241
3242 int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
3243 {
3244         struct inode *inode = d_inode(dentry);
3245         struct f2fs_inode_info *fi = F2FS_I(inode);
3246         u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
3247
3248         if (IS_ENCRYPTED(inode))
3249                 fsflags |= FS_ENCRYPT_FL;
3250         if (IS_VERITY(inode))
3251                 fsflags |= FS_VERITY_FL;
3252         if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
3253                 fsflags |= FS_INLINE_DATA_FL;
3254         if (is_inode_flag_set(inode, FI_PIN_FILE))
3255                 fsflags |= FS_NOCOW_FL;
3256
3257         fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL);
3258
3259         if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
3260                 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid);
3261
3262         return 0;
3263 }
3264
3265 int f2fs_fileattr_set(struct mnt_idmap *idmap,
3266                       struct dentry *dentry, struct fileattr *fa)
3267 {
3268         struct inode *inode = d_inode(dentry);
3269         u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL;
3270         u32 iflags;
3271         int err;
3272
3273         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
3274                 return -EIO;
3275         if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
3276                 return -ENOSPC;
3277         if (fsflags & ~F2FS_GETTABLE_FS_FL)
3278                 return -EOPNOTSUPP;
3279         fsflags &= F2FS_SETTABLE_FS_FL;
3280         if (!fa->flags_valid)
3281                 mask &= FS_COMMON_FL;
3282
3283         iflags = f2fs_fsflags_to_iflags(fsflags);
3284         if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
3285                 return -EOPNOTSUPP;
3286
3287         err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask));
3288         if (!err)
3289                 err = f2fs_ioc_setproject(inode, fa->fsx_projid);
3290
3291         return err;
3292 }
3293
3294 int f2fs_pin_file_control(struct inode *inode, bool inc)
3295 {
3296         struct f2fs_inode_info *fi = F2FS_I(inode);
3297         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3298
3299         if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) {
3300                 f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
3301                           __func__, inode->i_ino, fi->i_gc_failures);
3302                 clear_inode_flag(inode, FI_PIN_FILE);
3303                 return -EAGAIN;
3304         }
3305
3306         /* Use i_gc_failures for normal file as a risk signal. */
3307         if (inc)
3308                 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
3309
3310         return 0;
3311 }
3312
3313 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
3314 {
3315         struct inode *inode = file_inode(filp);
3316         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3317         __u32 pin;
3318         int ret = 0;
3319
3320         if (get_user(pin, (__u32 __user *)arg))
3321                 return -EFAULT;
3322
3323         if (!S_ISREG(inode->i_mode))
3324                 return -EINVAL;
3325
3326         if (f2fs_readonly(sbi->sb))
3327                 return -EROFS;
3328
3329         ret = mnt_want_write_file(filp);
3330         if (ret)
3331                 return ret;
3332
3333         inode_lock(inode);
3334
3335         if (f2fs_is_atomic_file(inode)) {
3336                 ret = -EINVAL;
3337                 goto out;
3338         }
3339
3340         if (!pin) {
3341                 clear_inode_flag(inode, FI_PIN_FILE);
3342                 f2fs_i_gc_failures_write(inode, 0);
3343                 goto done;
3344         } else if (f2fs_is_pinned_file(inode)) {
3345                 goto done;
3346         }
3347
3348         if (F2FS_HAS_BLOCKS(inode)) {
3349                 ret = -EFBIG;
3350                 goto out;
3351         }
3352
3353         /* Let's allow file pinning on zoned device. */
3354         if (!f2fs_sb_has_blkzoned(sbi) &&
3355             f2fs_should_update_outplace(inode, NULL)) {
3356                 ret = -EINVAL;
3357                 goto out;
3358         }
3359
3360         if (f2fs_pin_file_control(inode, false)) {
3361                 ret = -EAGAIN;
3362                 goto out;
3363         }
3364
3365         ret = f2fs_convert_inline_inode(inode);
3366         if (ret)
3367                 goto out;
3368
3369         if (!f2fs_disable_compressed_file(inode)) {
3370                 ret = -EOPNOTSUPP;
3371                 goto out;
3372         }
3373
3374         set_inode_flag(inode, FI_PIN_FILE);
3375         ret = F2FS_I(inode)->i_gc_failures;
3376 done:
3377         f2fs_update_time(sbi, REQ_TIME);
3378 out:
3379         inode_unlock(inode);
3380         mnt_drop_write_file(filp);
3381         return ret;
3382 }
3383
3384 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
3385 {
3386         struct inode *inode = file_inode(filp);
3387         __u32 pin = 0;
3388
3389         if (is_inode_flag_set(inode, FI_PIN_FILE))
3390                 pin = F2FS_I(inode)->i_gc_failures;
3391         return put_user(pin, (u32 __user *)arg);
3392 }
3393
3394 int f2fs_precache_extents(struct inode *inode)
3395 {
3396         struct f2fs_inode_info *fi = F2FS_I(inode);
3397         struct f2fs_map_blocks map;
3398         pgoff_t m_next_extent;
3399         loff_t end;
3400         int err;
3401
3402         if (is_inode_flag_set(inode, FI_NO_EXTENT))
3403                 return -EOPNOTSUPP;
3404
3405         map.m_lblk = 0;
3406         map.m_pblk = 0;
3407         map.m_next_pgofs = NULL;
3408         map.m_next_extent = &m_next_extent;
3409         map.m_seg_type = NO_CHECK_TYPE;
3410         map.m_may_create = false;
3411         end = F2FS_BLK_ALIGN(i_size_read(inode));
3412
3413         while (map.m_lblk < end) {
3414                 map.m_len = end - map.m_lblk;
3415
3416                 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3417                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE);
3418                 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3419                 if (err || !map.m_len)
3420                         return err;
3421
3422                 map.m_lblk = m_next_extent;
3423         }
3424
3425         return 0;
3426 }
3427
3428 static int f2fs_ioc_precache_extents(struct file *filp)
3429 {
3430         return f2fs_precache_extents(file_inode(filp));
3431 }
3432
3433 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
3434 {
3435         struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
3436         __u64 block_count;
3437
3438         if (!capable(CAP_SYS_ADMIN))
3439                 return -EPERM;
3440
3441         if (f2fs_readonly(sbi->sb))
3442                 return -EROFS;
3443
3444         if (copy_from_user(&block_count, (void __user *)arg,
3445                            sizeof(block_count)))
3446                 return -EFAULT;
3447
3448         return f2fs_resize_fs(filp, block_count);
3449 }
3450
3451 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
3452 {
3453         struct inode *inode = file_inode(filp);
3454
3455         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3456
3457         if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
3458                 f2fs_warn(F2FS_I_SB(inode),
3459                           "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem",
3460                           inode->i_ino);
3461                 return -EOPNOTSUPP;
3462         }
3463
3464         return fsverity_ioctl_enable(filp, (const void __user *)arg);
3465 }
3466
3467 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
3468 {
3469         if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3470                 return -EOPNOTSUPP;
3471
3472         return fsverity_ioctl_measure(filp, (void __user *)arg);
3473 }
3474
3475 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg)
3476 {
3477         if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3478                 return -EOPNOTSUPP;
3479
3480         return fsverity_ioctl_read_metadata(filp, (const void __user *)arg);
3481 }
3482
3483 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
3484 {
3485         struct inode *inode = file_inode(filp);
3486         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3487         char *vbuf;
3488         int count;
3489         int err = 0;
3490
3491         vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
3492         if (!vbuf)
3493                 return -ENOMEM;
3494
3495         f2fs_down_read(&sbi->sb_lock);
3496         count = utf16s_to_utf8s(sbi->raw_super->volume_name,
3497                         ARRAY_SIZE(sbi->raw_super->volume_name),
3498                         UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
3499         f2fs_up_read(&sbi->sb_lock);
3500
3501         if (copy_to_user((char __user *)arg, vbuf,
3502                                 min(FSLABEL_MAX, count)))
3503                 err = -EFAULT;
3504
3505         kfree(vbuf);
3506         return err;
3507 }
3508
3509 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
3510 {
3511         struct inode *inode = file_inode(filp);
3512         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3513         char *vbuf;
3514         int err = 0;
3515
3516         if (!capable(CAP_SYS_ADMIN))
3517                 return -EPERM;
3518
3519         vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
3520         if (IS_ERR(vbuf))
3521                 return PTR_ERR(vbuf);
3522
3523         err = mnt_want_write_file(filp);
3524         if (err)
3525                 goto out;
3526
3527         f2fs_down_write(&sbi->sb_lock);
3528
3529         memset(sbi->raw_super->volume_name, 0,
3530                         sizeof(sbi->raw_super->volume_name));
3531         utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
3532                         sbi->raw_super->volume_name,
3533                         ARRAY_SIZE(sbi->raw_super->volume_name));
3534
3535         err = f2fs_commit_super(sbi, false);
3536
3537         f2fs_up_write(&sbi->sb_lock);
3538
3539         mnt_drop_write_file(filp);
3540 out:
3541         kfree(vbuf);
3542         return err;
3543 }
3544
3545 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks)
3546 {
3547         if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
3548                 return -EOPNOTSUPP;
3549
3550         if (!f2fs_compressed_file(inode))
3551                 return -EINVAL;
3552
3553         *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
3554
3555         return 0;
3556 }
3557
3558 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg)
3559 {
3560         struct inode *inode = file_inode(filp);
3561         __u64 blocks;
3562         int ret;
3563
3564         ret = f2fs_get_compress_blocks(inode, &blocks);
3565         if (ret < 0)
3566                 return ret;
3567
3568         return put_user(blocks, (u64 __user *)arg);
3569 }
3570
3571 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
3572 {
3573         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3574         unsigned int released_blocks = 0;
3575         int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3576         block_t blkaddr;
3577         int i;
3578
3579         for (i = 0; i < count; i++) {
3580                 blkaddr = data_blkaddr(dn->inode, dn->node_page,
3581                                                 dn->ofs_in_node + i);
3582
3583                 if (!__is_valid_data_blkaddr(blkaddr))
3584                         continue;
3585                 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3586                                         DATA_GENERIC_ENHANCE)))
3587                         return -EFSCORRUPTED;
3588         }
3589
3590         while (count) {
3591                 int compr_blocks = 0;
3592
3593                 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
3594                         blkaddr = f2fs_data_blkaddr(dn);
3595
3596                         if (i == 0) {
3597                                 if (blkaddr == COMPRESS_ADDR)
3598                                         continue;
3599                                 dn->ofs_in_node += cluster_size;
3600                                 goto next;
3601                         }
3602
3603                         if (__is_valid_data_blkaddr(blkaddr))
3604                                 compr_blocks++;
3605
3606                         if (blkaddr != NEW_ADDR)
3607                                 continue;
3608
3609                         f2fs_set_data_blkaddr(dn, NULL_ADDR);
3610                 }
3611
3612                 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false);
3613                 dec_valid_block_count(sbi, dn->inode,
3614                                         cluster_size - compr_blocks);
3615
3616                 released_blocks += cluster_size - compr_blocks;
3617 next:
3618                 count -= cluster_size;
3619         }
3620
3621         return released_blocks;
3622 }
3623
3624 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
3625 {
3626         struct inode *inode = file_inode(filp);
3627         struct f2fs_inode_info *fi = F2FS_I(inode);
3628         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3629         pgoff_t page_idx = 0, last_idx;
3630         unsigned int released_blocks = 0;
3631         int ret;
3632         int writecount;
3633
3634         if (!f2fs_sb_has_compression(sbi))
3635                 return -EOPNOTSUPP;
3636
3637         if (f2fs_readonly(sbi->sb))
3638                 return -EROFS;
3639
3640         ret = mnt_want_write_file(filp);
3641         if (ret)
3642                 return ret;
3643
3644         f2fs_balance_fs(sbi, true);
3645
3646         inode_lock(inode);
3647
3648         writecount = atomic_read(&inode->i_writecount);
3649         if ((filp->f_mode & FMODE_WRITE && writecount != 1) ||
3650                         (!(filp->f_mode & FMODE_WRITE) && writecount)) {
3651                 ret = -EBUSY;
3652                 goto out;
3653         }
3654
3655         if (!f2fs_compressed_file(inode) ||
3656                 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
3657                 ret = -EINVAL;
3658                 goto out;
3659         }
3660
3661         ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
3662         if (ret)
3663                 goto out;
3664
3665         if (!atomic_read(&fi->i_compr_blocks)) {
3666                 ret = -EPERM;
3667                 goto out;
3668         }
3669
3670         set_inode_flag(inode, FI_COMPRESS_RELEASED);
3671         inode_set_ctime_current(inode);
3672         f2fs_mark_inode_dirty_sync(inode, true);
3673
3674         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3675         filemap_invalidate_lock(inode->i_mapping);
3676
3677         last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3678
3679         while (page_idx < last_idx) {
3680                 struct dnode_of_data dn;
3681                 pgoff_t end_offset, count;
3682
3683                 f2fs_lock_op(sbi);
3684
3685                 set_new_dnode(&dn, inode, NULL, NULL, 0);
3686                 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
3687                 if (ret) {
3688                         f2fs_unlock_op(sbi);
3689                         if (ret == -ENOENT) {
3690                                 page_idx = f2fs_get_next_page_offset(&dn,
3691                                                                 page_idx);
3692                                 ret = 0;
3693                                 continue;
3694                         }
3695                         break;
3696                 }
3697
3698                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
3699                 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
3700                 count = round_up(count, fi->i_cluster_size);
3701
3702                 ret = release_compress_blocks(&dn, count);
3703
3704                 f2fs_put_dnode(&dn);
3705
3706                 f2fs_unlock_op(sbi);
3707
3708                 if (ret < 0)
3709                         break;
3710
3711                 page_idx += count;
3712                 released_blocks += ret;
3713         }
3714
3715         filemap_invalidate_unlock(inode->i_mapping);
3716         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3717 out:
3718         if (released_blocks)
3719                 f2fs_update_time(sbi, REQ_TIME);
3720         inode_unlock(inode);
3721
3722         mnt_drop_write_file(filp);
3723
3724         if (ret >= 0) {
3725                 ret = put_user(released_blocks, (u64 __user *)arg);
3726         } else if (released_blocks &&
3727                         atomic_read(&fi->i_compr_blocks)) {
3728                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3729                 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
3730                         "iblocks=%llu, released=%u, compr_blocks=%u, "
3731                         "run fsck to fix.",
3732                         __func__, inode->i_ino, inode->i_blocks,
3733                         released_blocks,
3734                         atomic_read(&fi->i_compr_blocks));
3735         }
3736
3737         return ret;
3738 }
3739
3740 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count,
3741                 unsigned int *reserved_blocks)
3742 {
3743         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3744         int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3745         block_t blkaddr;
3746         int i;
3747
3748         for (i = 0; i < count; i++) {
3749                 blkaddr = data_blkaddr(dn->inode, dn->node_page,
3750                                                 dn->ofs_in_node + i);
3751
3752                 if (!__is_valid_data_blkaddr(blkaddr))
3753                         continue;
3754                 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3755                                         DATA_GENERIC_ENHANCE)))
3756                         return -EFSCORRUPTED;
3757         }
3758
3759         while (count) {
3760                 int compr_blocks = 0;
3761                 blkcnt_t reserved = 0;
3762                 blkcnt_t to_reserved;
3763                 int ret;
3764
3765                 for (i = 0; i < cluster_size; i++) {
3766                         blkaddr = data_blkaddr(dn->inode, dn->node_page,
3767                                                 dn->ofs_in_node + i);
3768
3769                         if (i == 0) {
3770                                 if (blkaddr != COMPRESS_ADDR) {
3771                                         dn->ofs_in_node += cluster_size;
3772                                         goto next;
3773                                 }
3774                                 continue;
3775                         }
3776
3777                         /*
3778                          * compressed cluster was not released due to it
3779                          * fails in release_compress_blocks(), so NEW_ADDR
3780                          * is a possible case.
3781                          */
3782                         if (blkaddr == NEW_ADDR) {
3783                                 reserved++;
3784                                 continue;
3785                         }
3786                         if (__is_valid_data_blkaddr(blkaddr)) {
3787                                 compr_blocks++;
3788                                 continue;
3789                         }
3790                 }
3791
3792                 to_reserved = cluster_size - compr_blocks - reserved;
3793
3794                 /* for the case all blocks in cluster were reserved */
3795                 if (to_reserved == 1) {
3796                         dn->ofs_in_node += cluster_size;
3797                         goto next;
3798                 }
3799
3800                 ret = inc_valid_block_count(sbi, dn->inode,
3801                                                 &to_reserved, false);
3802                 if (unlikely(ret))
3803                         return ret;
3804
3805                 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
3806                         if (f2fs_data_blkaddr(dn) == NULL_ADDR)
3807                                 f2fs_set_data_blkaddr(dn, NEW_ADDR);
3808                 }
3809
3810                 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
3811
3812                 *reserved_blocks += to_reserved;
3813 next:
3814                 count -= cluster_size;
3815         }
3816
3817         return 0;
3818 }
3819
3820 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
3821 {
3822         struct inode *inode = file_inode(filp);
3823         struct f2fs_inode_info *fi = F2FS_I(inode);
3824         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3825         pgoff_t page_idx = 0, last_idx;
3826         unsigned int reserved_blocks = 0;
3827         int ret;
3828
3829         if (!f2fs_sb_has_compression(sbi))
3830                 return -EOPNOTSUPP;
3831
3832         if (f2fs_readonly(sbi->sb))
3833                 return -EROFS;
3834
3835         ret = mnt_want_write_file(filp);
3836         if (ret)
3837                 return ret;
3838
3839         f2fs_balance_fs(sbi, true);
3840
3841         inode_lock(inode);
3842
3843         if (!f2fs_compressed_file(inode) ||
3844                 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
3845                 ret = -EINVAL;
3846                 goto unlock_inode;
3847         }
3848
3849         if (atomic_read(&fi->i_compr_blocks))
3850                 goto unlock_inode;
3851
3852         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3853         filemap_invalidate_lock(inode->i_mapping);
3854
3855         last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3856
3857         while (page_idx < last_idx) {
3858                 struct dnode_of_data dn;
3859                 pgoff_t end_offset, count;
3860
3861                 f2fs_lock_op(sbi);
3862
3863                 set_new_dnode(&dn, inode, NULL, NULL, 0);
3864                 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
3865                 if (ret) {
3866                         f2fs_unlock_op(sbi);
3867                         if (ret == -ENOENT) {
3868                                 page_idx = f2fs_get_next_page_offset(&dn,
3869                                                                 page_idx);
3870                                 ret = 0;
3871                                 continue;
3872                         }
3873                         break;
3874                 }
3875
3876                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
3877                 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
3878                 count = round_up(count, fi->i_cluster_size);
3879
3880                 ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
3881
3882                 f2fs_put_dnode(&dn);
3883
3884                 f2fs_unlock_op(sbi);
3885
3886                 if (ret < 0)
3887                         break;
3888
3889                 page_idx += count;
3890         }
3891
3892         filemap_invalidate_unlock(inode->i_mapping);
3893         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3894
3895         if (!ret) {
3896                 clear_inode_flag(inode, FI_COMPRESS_RELEASED);
3897                 inode_set_ctime_current(inode);
3898                 f2fs_mark_inode_dirty_sync(inode, true);
3899         }
3900 unlock_inode:
3901         if (reserved_blocks)
3902                 f2fs_update_time(sbi, REQ_TIME);
3903         inode_unlock(inode);
3904         mnt_drop_write_file(filp);
3905
3906         if (!ret) {
3907                 ret = put_user(reserved_blocks, (u64 __user *)arg);
3908         } else if (reserved_blocks &&
3909                         atomic_read(&fi->i_compr_blocks)) {
3910                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3911                 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx "
3912                         "iblocks=%llu, reserved=%u, compr_blocks=%u, "
3913                         "run fsck to fix.",
3914                         __func__, inode->i_ino, inode->i_blocks,
3915                         reserved_blocks,
3916                         atomic_read(&fi->i_compr_blocks));
3917         }
3918
3919         return ret;
3920 }
3921
3922 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
3923                 pgoff_t off, block_t block, block_t len, u32 flags)
3924 {
3925         sector_t sector = SECTOR_FROM_BLOCK(block);
3926         sector_t nr_sects = SECTOR_FROM_BLOCK(len);
3927         int ret = 0;
3928
3929         if (flags & F2FS_TRIM_FILE_DISCARD) {
3930                 if (bdev_max_secure_erase_sectors(bdev))
3931                         ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
3932                                         GFP_NOFS);
3933                 else
3934                         ret = blkdev_issue_discard(bdev, sector, nr_sects,
3935                                         GFP_NOFS);
3936         }
3937
3938         if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
3939                 if (IS_ENCRYPTED(inode))
3940                         ret = fscrypt_zeroout_range(inode, off, block, len);
3941                 else
3942                         ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
3943                                         GFP_NOFS, 0);
3944         }
3945
3946         return ret;
3947 }
3948
3949 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
3950 {
3951         struct inode *inode = file_inode(filp);
3952         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3953         struct address_space *mapping = inode->i_mapping;
3954         struct block_device *prev_bdev = NULL;
3955         struct f2fs_sectrim_range range;
3956         pgoff_t index, pg_end, prev_index = 0;
3957         block_t prev_block = 0, len = 0;
3958         loff_t end_addr;
3959         bool to_end = false;
3960         int ret = 0;
3961
3962         if (!(filp->f_mode & FMODE_WRITE))
3963                 return -EBADF;
3964
3965         if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg,
3966                                 sizeof(range)))
3967                 return -EFAULT;
3968
3969         if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) ||
3970                         !S_ISREG(inode->i_mode))
3971                 return -EINVAL;
3972
3973         if (((range.flags & F2FS_TRIM_FILE_DISCARD) &&
3974                         !f2fs_hw_support_discard(sbi)) ||
3975                         ((range.flags & F2FS_TRIM_FILE_ZEROOUT) &&
3976                          IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
3977                 return -EOPNOTSUPP;
3978
3979         ret = mnt_want_write_file(filp);
3980         if (ret)
3981                 return ret;
3982         inode_lock(inode);
3983
3984         if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
3985                         range.start >= inode->i_size) {
3986                 ret = -EINVAL;
3987                 goto err;
3988         }
3989
3990         if (range.len == 0)
3991                 goto err;
3992
3993         if (inode->i_size - range.start > range.len) {
3994                 end_addr = range.start + range.len;
3995         } else {
3996                 end_addr = range.len == (u64)-1 ?
3997                         sbi->sb->s_maxbytes : inode->i_size;
3998                 to_end = true;
3999         }
4000
4001         if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) ||
4002                         (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) {
4003                 ret = -EINVAL;
4004                 goto err;
4005         }
4006
4007         index = F2FS_BYTES_TO_BLK(range.start);
4008         pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE);
4009
4010         ret = f2fs_convert_inline_inode(inode);
4011         if (ret)
4012                 goto err;
4013
4014         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4015         filemap_invalidate_lock(mapping);
4016
4017         ret = filemap_write_and_wait_range(mapping, range.start,
4018                         to_end ? LLONG_MAX : end_addr - 1);
4019         if (ret)
4020                 goto out;
4021
4022         truncate_inode_pages_range(mapping, range.start,
4023                         to_end ? -1 : end_addr - 1);
4024
4025         while (index < pg_end) {
4026                 struct dnode_of_data dn;
4027                 pgoff_t end_offset, count;
4028                 int i;
4029
4030                 set_new_dnode(&dn, inode, NULL, NULL, 0);
4031                 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
4032                 if (ret) {
4033                         if (ret == -ENOENT) {
4034                                 index = f2fs_get_next_page_offset(&dn, index);
4035                                 continue;
4036                         }
4037                         goto out;
4038                 }
4039
4040                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
4041                 count = min(end_offset - dn.ofs_in_node, pg_end - index);
4042                 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
4043                         struct block_device *cur_bdev;
4044                         block_t blkaddr = f2fs_data_blkaddr(&dn);
4045
4046                         if (!__is_valid_data_blkaddr(blkaddr))
4047                                 continue;
4048
4049                         if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
4050                                                 DATA_GENERIC_ENHANCE)) {
4051                                 ret = -EFSCORRUPTED;
4052                                 f2fs_put_dnode(&dn);
4053                                 goto out;
4054                         }
4055
4056                         cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
4057                         if (f2fs_is_multi_device(sbi)) {
4058                                 int di = f2fs_target_device_index(sbi, blkaddr);
4059
4060                                 blkaddr -= FDEV(di).start_blk;
4061                         }
4062
4063                         if (len) {
4064                                 if (prev_bdev == cur_bdev &&
4065                                                 index == prev_index + len &&
4066                                                 blkaddr == prev_block + len) {
4067                                         len++;
4068                                 } else {
4069                                         ret = f2fs_secure_erase(prev_bdev,
4070                                                 inode, prev_index, prev_block,
4071                                                 len, range.flags);
4072                                         if (ret) {
4073                                                 f2fs_put_dnode(&dn);
4074                                                 goto out;
4075                                         }
4076
4077                                         len = 0;
4078                                 }
4079                         }
4080
4081                         if (!len) {
4082                                 prev_bdev = cur_bdev;
4083                                 prev_index = index;
4084                                 prev_block = blkaddr;
4085                                 len = 1;
4086                         }
4087                 }
4088
4089                 f2fs_put_dnode(&dn);
4090
4091                 if (fatal_signal_pending(current)) {
4092                         ret = -EINTR;
4093                         goto out;
4094                 }
4095                 cond_resched();
4096         }
4097
4098         if (len)
4099                 ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
4100                                 prev_block, len, range.flags);
4101         f2fs_update_time(sbi, REQ_TIME);
4102 out:
4103         filemap_invalidate_unlock(mapping);
4104         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4105 err:
4106         inode_unlock(inode);
4107         mnt_drop_write_file(filp);
4108
4109         return ret;
4110 }
4111
4112 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
4113 {
4114         struct inode *inode = file_inode(filp);
4115         struct f2fs_comp_option option;
4116
4117         if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
4118                 return -EOPNOTSUPP;
4119
4120         inode_lock_shared(inode);
4121
4122         if (!f2fs_compressed_file(inode)) {
4123                 inode_unlock_shared(inode);
4124                 return -ENODATA;
4125         }
4126
4127         option.algorithm = F2FS_I(inode)->i_compress_algorithm;
4128         option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
4129
4130         inode_unlock_shared(inode);
4131
4132         if (copy_to_user((struct f2fs_comp_option __user *)arg, &option,
4133                                 sizeof(option)))
4134                 return -EFAULT;
4135
4136         return 0;
4137 }
4138
4139 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
4140 {
4141         struct inode *inode = file_inode(filp);
4142         struct f2fs_inode_info *fi = F2FS_I(inode);
4143         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4144         struct f2fs_comp_option option;
4145         int ret = 0;
4146
4147         if (!f2fs_sb_has_compression(sbi))
4148                 return -EOPNOTSUPP;
4149
4150         if (!(filp->f_mode & FMODE_WRITE))
4151                 return -EBADF;
4152
4153         if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg,
4154                                 sizeof(option)))
4155                 return -EFAULT;
4156
4157         if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
4158                 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
4159                 option.algorithm >= COMPRESS_MAX)
4160                 return -EINVAL;
4161
4162         ret = mnt_want_write_file(filp);
4163         if (ret)
4164                 return ret;
4165         inode_lock(inode);
4166
4167         f2fs_down_write(&F2FS_I(inode)->i_sem);
4168         if (!f2fs_compressed_file(inode)) {
4169                 ret = -EINVAL;
4170                 goto out;
4171         }
4172
4173         if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
4174                 ret = -EBUSY;
4175                 goto out;
4176         }
4177
4178         if (F2FS_HAS_BLOCKS(inode)) {
4179                 ret = -EFBIG;
4180                 goto out;
4181         }
4182
4183         fi->i_compress_algorithm = option.algorithm;
4184         fi->i_log_cluster_size = option.log_cluster_size;
4185         fi->i_cluster_size = BIT(option.log_cluster_size);
4186         /* Set default level */
4187         if (fi->i_compress_algorithm == COMPRESS_ZSTD)
4188                 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
4189         else
4190                 fi->i_compress_level = 0;
4191         /* Adjust mount option level */
4192         if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
4193             F2FS_OPTION(sbi).compress_level)
4194                 fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
4195         f2fs_mark_inode_dirty_sync(inode, true);
4196
4197         if (!f2fs_is_compress_backend_ready(inode))
4198                 f2fs_warn(sbi, "compression algorithm is successfully set, "
4199                         "but current kernel doesn't support this algorithm.");
4200 out:
4201         f2fs_up_write(&fi->i_sem);
4202         inode_unlock(inode);
4203         mnt_drop_write_file(filp);
4204
4205         return ret;
4206 }
4207
4208 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
4209 {
4210         DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx);
4211         struct address_space *mapping = inode->i_mapping;
4212         struct page *page;
4213         pgoff_t redirty_idx = page_idx;
4214         int i, page_len = 0, ret = 0;
4215
4216         page_cache_ra_unbounded(&ractl, len, 0);
4217
4218         for (i = 0; i < len; i++, page_idx++) {
4219                 page = read_cache_page(mapping, page_idx, NULL, NULL);
4220                 if (IS_ERR(page)) {
4221                         ret = PTR_ERR(page);
4222                         break;
4223                 }
4224                 page_len++;
4225         }
4226
4227         for (i = 0; i < page_len; i++, redirty_idx++) {
4228                 page = find_lock_page(mapping, redirty_idx);
4229
4230                 /* It will never fail, when page has pinned above */
4231                 f2fs_bug_on(F2FS_I_SB(inode), !page);
4232
4233                 f2fs_wait_on_page_writeback(page, DATA, true, true);
4234
4235                 set_page_dirty(page);
4236                 set_page_private_gcing(page);
4237                 f2fs_put_page(page, 1);
4238                 f2fs_put_page(page, 0);
4239         }
4240
4241         return ret;
4242 }
4243
4244 static int f2fs_ioc_decompress_file(struct file *filp)
4245 {
4246         struct inode *inode = file_inode(filp);
4247         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4248         struct f2fs_inode_info *fi = F2FS_I(inode);
4249         pgoff_t page_idx = 0, last_idx, cluster_idx;
4250         int ret;
4251
4252         if (!f2fs_sb_has_compression(sbi) ||
4253                         F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4254                 return -EOPNOTSUPP;
4255
4256         if (!(filp->f_mode & FMODE_WRITE))
4257                 return -EBADF;
4258
4259         f2fs_balance_fs(sbi, true);
4260
4261         ret = mnt_want_write_file(filp);
4262         if (ret)
4263                 return ret;
4264         inode_lock(inode);
4265
4266         if (!f2fs_is_compress_backend_ready(inode)) {
4267                 ret = -EOPNOTSUPP;
4268                 goto out;
4269         }
4270
4271         if (!f2fs_compressed_file(inode) ||
4272                 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4273                 ret = -EINVAL;
4274                 goto out;
4275         }
4276
4277         ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4278         if (ret)
4279                 goto out;
4280
4281         if (!atomic_read(&fi->i_compr_blocks))
4282                 goto out;
4283
4284         last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4285         last_idx >>= fi->i_log_cluster_size;
4286
4287         for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4288                 page_idx = cluster_idx << fi->i_log_cluster_size;
4289
4290                 if (!f2fs_is_compressed_cluster(inode, page_idx))
4291                         continue;
4292
4293                 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4294                 if (ret < 0)
4295                         break;
4296
4297                 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4298                         ret = filemap_fdatawrite(inode->i_mapping);
4299                         if (ret < 0)
4300                                 break;
4301                 }
4302
4303                 cond_resched();
4304                 if (fatal_signal_pending(current)) {
4305                         ret = -EINTR;
4306                         break;
4307                 }
4308         }
4309
4310         if (!ret)
4311                 ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4312                                                         LLONG_MAX);
4313
4314         if (ret)
4315                 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.",
4316                           __func__, ret);
4317         f2fs_update_time(sbi, REQ_TIME);
4318 out:
4319         inode_unlock(inode);
4320         mnt_drop_write_file(filp);
4321
4322         return ret;
4323 }
4324
4325 static int f2fs_ioc_compress_file(struct file *filp)
4326 {
4327         struct inode *inode = file_inode(filp);
4328         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4329         struct f2fs_inode_info *fi = F2FS_I(inode);
4330         pgoff_t page_idx = 0, last_idx, cluster_idx;
4331         int ret;
4332
4333         if (!f2fs_sb_has_compression(sbi) ||
4334                         F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4335                 return -EOPNOTSUPP;
4336
4337         if (!(filp->f_mode & FMODE_WRITE))
4338                 return -EBADF;
4339
4340         f2fs_balance_fs(sbi, true);
4341
4342         ret = mnt_want_write_file(filp);
4343         if (ret)
4344                 return ret;
4345         inode_lock(inode);
4346
4347         if (!f2fs_is_compress_backend_ready(inode)) {
4348                 ret = -EOPNOTSUPP;
4349                 goto out;
4350         }
4351
4352         if (!f2fs_compressed_file(inode) ||
4353                 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4354                 ret = -EINVAL;
4355                 goto out;
4356         }
4357
4358         ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4359         if (ret)
4360                 goto out;
4361
4362         set_inode_flag(inode, FI_ENABLE_COMPRESS);
4363
4364         last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4365         last_idx >>= fi->i_log_cluster_size;
4366
4367         for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4368                 page_idx = cluster_idx << fi->i_log_cluster_size;
4369
4370                 if (f2fs_is_sparse_cluster(inode, page_idx))
4371                         continue;
4372
4373                 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4374                 if (ret < 0)
4375                         break;
4376
4377                 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4378                         ret = filemap_fdatawrite(inode->i_mapping);
4379                         if (ret < 0)
4380                                 break;
4381                 }
4382
4383                 cond_resched();
4384                 if (fatal_signal_pending(current)) {
4385                         ret = -EINTR;
4386                         break;
4387                 }
4388         }
4389
4390         if (!ret)
4391                 ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4392                                                         LLONG_MAX);
4393
4394         clear_inode_flag(inode, FI_ENABLE_COMPRESS);
4395
4396         if (ret)
4397                 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.",
4398                           __func__, ret);
4399         f2fs_update_time(sbi, REQ_TIME);
4400 out:
4401         inode_unlock(inode);
4402         mnt_drop_write_file(filp);
4403
4404         return ret;
4405 }
4406
4407 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4408 {
4409         switch (cmd) {
4410         case FS_IOC_GETVERSION:
4411                 return f2fs_ioc_getversion(filp, arg);
4412         case F2FS_IOC_START_ATOMIC_WRITE:
4413                 return f2fs_ioc_start_atomic_write(filp, false);
4414         case F2FS_IOC_START_ATOMIC_REPLACE:
4415                 return f2fs_ioc_start_atomic_write(filp, true);
4416         case F2FS_IOC_COMMIT_ATOMIC_WRITE:
4417                 return f2fs_ioc_commit_atomic_write(filp);
4418         case F2FS_IOC_ABORT_ATOMIC_WRITE:
4419                 return f2fs_ioc_abort_atomic_write(filp);
4420         case F2FS_IOC_START_VOLATILE_WRITE:
4421         case F2FS_IOC_RELEASE_VOLATILE_WRITE:
4422                 return -EOPNOTSUPP;
4423         case F2FS_IOC_SHUTDOWN:
4424                 return f2fs_ioc_shutdown(filp, arg);
4425         case FITRIM:
4426                 return f2fs_ioc_fitrim(filp, arg);
4427         case FS_IOC_SET_ENCRYPTION_POLICY:
4428                 return f2fs_ioc_set_encryption_policy(filp, arg);
4429         case FS_IOC_GET_ENCRYPTION_POLICY:
4430                 return f2fs_ioc_get_encryption_policy(filp, arg);
4431         case FS_IOC_GET_ENCRYPTION_PWSALT:
4432                 return f2fs_ioc_get_encryption_pwsalt(filp, arg);
4433         case FS_IOC_GET_ENCRYPTION_POLICY_EX:
4434                 return f2fs_ioc_get_encryption_policy_ex(filp, arg);
4435         case FS_IOC_ADD_ENCRYPTION_KEY:
4436                 return f2fs_ioc_add_encryption_key(filp, arg);
4437         case FS_IOC_REMOVE_ENCRYPTION_KEY:
4438                 return f2fs_ioc_remove_encryption_key(filp, arg);
4439         case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
4440                 return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
4441         case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
4442                 return f2fs_ioc_get_encryption_key_status(filp, arg);
4443         case FS_IOC_GET_ENCRYPTION_NONCE:
4444                 return f2fs_ioc_get_encryption_nonce(filp, arg);
4445         case F2FS_IOC_GARBAGE_COLLECT:
4446                 return f2fs_ioc_gc(filp, arg);
4447         case F2FS_IOC_GARBAGE_COLLECT_RANGE:
4448                 return f2fs_ioc_gc_range(filp, arg);
4449         case F2FS_IOC_WRITE_CHECKPOINT:
4450                 return f2fs_ioc_write_checkpoint(filp);
4451         case F2FS_IOC_DEFRAGMENT:
4452                 return f2fs_ioc_defragment(filp, arg);
4453         case F2FS_IOC_MOVE_RANGE:
4454                 return f2fs_ioc_move_range(filp, arg);
4455         case F2FS_IOC_FLUSH_DEVICE:
4456                 return f2fs_ioc_flush_device(filp, arg);
4457         case F2FS_IOC_GET_FEATURES:
4458                 return f2fs_ioc_get_features(filp, arg);
4459         case F2FS_IOC_GET_PIN_FILE:
4460                 return f2fs_ioc_get_pin_file(filp, arg);
4461         case F2FS_IOC_SET_PIN_FILE:
4462                 return f2fs_ioc_set_pin_file(filp, arg);
4463         case F2FS_IOC_PRECACHE_EXTENTS:
4464                 return f2fs_ioc_precache_extents(filp);
4465         case F2FS_IOC_RESIZE_FS:
4466                 return f2fs_ioc_resize_fs(filp, arg);
4467         case FS_IOC_ENABLE_VERITY:
4468                 return f2fs_ioc_enable_verity(filp, arg);
4469         case FS_IOC_MEASURE_VERITY:
4470                 return f2fs_ioc_measure_verity(filp, arg);
4471         case FS_IOC_READ_VERITY_METADATA:
4472                 return f2fs_ioc_read_verity_metadata(filp, arg);
4473         case FS_IOC_GETFSLABEL:
4474                 return f2fs_ioc_getfslabel(filp, arg);
4475         case FS_IOC_SETFSLABEL:
4476                 return f2fs_ioc_setfslabel(filp, arg);
4477         case F2FS_IOC_GET_COMPRESS_BLOCKS:
4478                 return f2fs_ioc_get_compress_blocks(filp, arg);
4479         case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
4480                 return f2fs_release_compress_blocks(filp, arg);
4481         case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
4482                 return f2fs_reserve_compress_blocks(filp, arg);
4483         case F2FS_IOC_SEC_TRIM_FILE:
4484                 return f2fs_sec_trim_file(filp, arg);
4485         case F2FS_IOC_GET_COMPRESS_OPTION:
4486                 return f2fs_ioc_get_compress_option(filp, arg);
4487         case F2FS_IOC_SET_COMPRESS_OPTION:
4488                 return f2fs_ioc_set_compress_option(filp, arg);
4489         case F2FS_IOC_DECOMPRESS_FILE:
4490                 return f2fs_ioc_decompress_file(filp);
4491         case F2FS_IOC_COMPRESS_FILE:
4492                 return f2fs_ioc_compress_file(filp);
4493         default:
4494                 return -ENOTTY;
4495         }
4496 }
4497
4498 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4499 {
4500         if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
4501                 return -EIO;
4502         if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
4503                 return -ENOSPC;
4504
4505         return __f2fs_ioctl(filp, cmd, arg);
4506 }
4507
4508 /*
4509  * Return %true if the given read or write request should use direct I/O, or
4510  * %false if it should use buffered I/O.
4511  */
4512 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
4513                                 struct iov_iter *iter)
4514 {
4515         unsigned int align;
4516
4517         if (!(iocb->ki_flags & IOCB_DIRECT))
4518                 return false;
4519
4520         if (f2fs_force_buffered_io(inode, iov_iter_rw(iter)))
4521                 return false;
4522
4523         /*
4524          * Direct I/O not aligned to the disk's logical_block_size will be
4525          * attempted, but will fail with -EINVAL.
4526          *
4527          * f2fs additionally requires that direct I/O be aligned to the
4528          * filesystem block size, which is often a stricter requirement.
4529          * However, f2fs traditionally falls back to buffered I/O on requests
4530          * that are logical_block_size-aligned but not fs-block aligned.
4531          *
4532          * The below logic implements this behavior.
4533          */
4534         align = iocb->ki_pos | iov_iter_alignment(iter);
4535         if (!IS_ALIGNED(align, i_blocksize(inode)) &&
4536             IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
4537                 return false;
4538
4539         return true;
4540 }
4541
4542 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
4543                                 unsigned int flags)
4544 {
4545         struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
4546
4547         dec_page_count(sbi, F2FS_DIO_READ);
4548         if (error)
4549                 return error;
4550         f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size);
4551         return 0;
4552 }
4553
4554 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
4555         .end_io = f2fs_dio_read_end_io,
4556 };
4557
4558 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
4559 {
4560         struct file *file = iocb->ki_filp;
4561         struct inode *inode = file_inode(file);
4562         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4563         struct f2fs_inode_info *fi = F2FS_I(inode);
4564         const loff_t pos = iocb->ki_pos;
4565         const size_t count = iov_iter_count(to);
4566         struct iomap_dio *dio;
4567         ssize_t ret;
4568
4569         if (count == 0)
4570                 return 0; /* skip atime update */
4571
4572         trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
4573
4574         if (iocb->ki_flags & IOCB_NOWAIT) {
4575                 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
4576                         ret = -EAGAIN;
4577                         goto out;
4578                 }
4579         } else {
4580                 f2fs_down_read(&fi->i_gc_rwsem[READ]);
4581         }
4582
4583         /* dio is not compatible w/ atomic file */
4584         if (f2fs_is_atomic_file(inode)) {
4585                 f2fs_up_read(&fi->i_gc_rwsem[READ]);
4586                 ret = -EOPNOTSUPP;
4587                 goto out;
4588         }
4589
4590         /*
4591          * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
4592          * the higher-level function iomap_dio_rw() in order to ensure that the
4593          * F2FS_DIO_READ counter will be decremented correctly in all cases.
4594          */
4595         inc_page_count(sbi, F2FS_DIO_READ);
4596         dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
4597                              &f2fs_iomap_dio_read_ops, 0, NULL, 0);
4598         if (IS_ERR_OR_NULL(dio)) {
4599                 ret = PTR_ERR_OR_ZERO(dio);
4600                 if (ret != -EIOCBQUEUED)
4601                         dec_page_count(sbi, F2FS_DIO_READ);
4602         } else {
4603                 ret = iomap_dio_complete(dio);
4604         }
4605
4606         f2fs_up_read(&fi->i_gc_rwsem[READ]);
4607
4608         file_accessed(file);
4609 out:
4610         trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
4611         return ret;
4612 }
4613
4614 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count,
4615                                     int rw)
4616 {
4617         struct inode *inode = file_inode(file);
4618         char *buf, *path;
4619
4620         buf = f2fs_getname(F2FS_I_SB(inode));
4621         if (!buf)
4622                 return;
4623         path = dentry_path_raw(file_dentry(file), buf, PATH_MAX);
4624         if (IS_ERR(path))
4625                 goto free_buf;
4626         if (rw == WRITE)
4627                 trace_f2fs_datawrite_start(inode, pos, count,
4628                                 current->pid, path, current->comm);
4629         else
4630                 trace_f2fs_dataread_start(inode, pos, count,
4631                                 current->pid, path, current->comm);
4632 free_buf:
4633         f2fs_putname(buf);
4634 }
4635
4636 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
4637 {
4638         struct inode *inode = file_inode(iocb->ki_filp);
4639         const loff_t pos = iocb->ki_pos;
4640         ssize_t ret;
4641
4642         if (!f2fs_is_compress_backend_ready(inode))
4643                 return -EOPNOTSUPP;
4644
4645         if (trace_f2fs_dataread_start_enabled())
4646                 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
4647                                         iov_iter_count(to), READ);
4648
4649         /* In LFS mode, if there is inflight dio, wait for its completion */
4650         if (f2fs_lfs_mode(F2FS_I_SB(inode)))
4651                 inode_dio_wait(inode);
4652
4653         if (f2fs_should_use_dio(inode, iocb, to)) {
4654                 ret = f2fs_dio_read_iter(iocb, to);
4655         } else {
4656                 ret = filemap_read(iocb, to, 0);
4657                 if (ret > 0)
4658                         f2fs_update_iostat(F2FS_I_SB(inode), inode,
4659                                                 APP_BUFFERED_READ_IO, ret);
4660         }
4661         if (trace_f2fs_dataread_end_enabled())
4662                 trace_f2fs_dataread_end(inode, pos, ret);
4663         return ret;
4664 }
4665
4666 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos,
4667                                      struct pipe_inode_info *pipe,
4668                                      size_t len, unsigned int flags)
4669 {
4670         struct inode *inode = file_inode(in);
4671         const loff_t pos = *ppos;
4672         ssize_t ret;
4673
4674         if (!f2fs_is_compress_backend_ready(inode))
4675                 return -EOPNOTSUPP;
4676
4677         if (trace_f2fs_dataread_start_enabled())
4678                 f2fs_trace_rw_file_path(in, pos, len, READ);
4679
4680         ret = filemap_splice_read(in, ppos, pipe, len, flags);
4681         if (ret > 0)
4682                 f2fs_update_iostat(F2FS_I_SB(inode), inode,
4683                                    APP_BUFFERED_READ_IO, ret);
4684
4685         if (trace_f2fs_dataread_end_enabled())
4686                 trace_f2fs_dataread_end(inode, pos, ret);
4687         return ret;
4688 }
4689
4690 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
4691 {
4692         struct file *file = iocb->ki_filp;
4693         struct inode *inode = file_inode(file);
4694         ssize_t count;
4695         int err;
4696
4697         if (IS_IMMUTABLE(inode))
4698                 return -EPERM;
4699
4700         if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
4701                 return -EPERM;
4702
4703         count = generic_write_checks(iocb, from);
4704         if (count <= 0)
4705                 return count;
4706
4707         err = file_modified(file);
4708         if (err)
4709                 return err;
4710         return count;
4711 }
4712
4713 /*
4714  * Preallocate blocks for a write request, if it is possible and helpful to do
4715  * so.  Returns a positive number if blocks may have been preallocated, 0 if no
4716  * blocks were preallocated, or a negative errno value if something went
4717  * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
4718  * requested blocks (not just some of them) have been allocated.
4719  */
4720 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
4721                                    bool dio)
4722 {
4723         struct inode *inode = file_inode(iocb->ki_filp);
4724         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4725         const loff_t pos = iocb->ki_pos;
4726         const size_t count = iov_iter_count(iter);
4727         struct f2fs_map_blocks map = {};
4728         int flag;
4729         int ret;
4730
4731         /* If it will be an out-of-place direct write, don't bother. */
4732         if (dio && f2fs_lfs_mode(sbi))
4733                 return 0;
4734         /*
4735          * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
4736          * buffered IO, if DIO meets any holes.
4737          */
4738         if (dio && i_size_read(inode) &&
4739                 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
4740                 return 0;
4741
4742         /* No-wait I/O can't allocate blocks. */
4743         if (iocb->ki_flags & IOCB_NOWAIT)
4744                 return 0;
4745
4746         /* If it will be a short write, don't bother. */
4747         if (fault_in_iov_iter_readable(iter, count))
4748                 return 0;
4749
4750         if (f2fs_has_inline_data(inode)) {
4751                 /* If the data will fit inline, don't bother. */
4752                 if (pos + count <= MAX_INLINE_DATA(inode))
4753                         return 0;
4754                 ret = f2fs_convert_inline_inode(inode);
4755                 if (ret)
4756                         return ret;
4757         }
4758
4759         /* Do not preallocate blocks that will be written partially in 4KB. */
4760         map.m_lblk = F2FS_BLK_ALIGN(pos);
4761         map.m_len = F2FS_BYTES_TO_BLK(pos + count);
4762         if (map.m_len > map.m_lblk)
4763                 map.m_len -= map.m_lblk;
4764         else
4765                 return 0;
4766
4767         map.m_may_create = true;
4768         if (dio) {
4769                 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi,
4770                                                 inode->i_write_hint);
4771                 flag = F2FS_GET_BLOCK_PRE_DIO;
4772         } else {
4773                 map.m_seg_type = NO_CHECK_TYPE;
4774                 flag = F2FS_GET_BLOCK_PRE_AIO;
4775         }
4776
4777         ret = f2fs_map_blocks(inode, &map, flag);
4778         /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
4779         if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
4780                 return ret;
4781         if (ret == 0)
4782                 set_inode_flag(inode, FI_PREALLOCATED_ALL);
4783         return map.m_len;
4784 }
4785
4786 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
4787                                         struct iov_iter *from)
4788 {
4789         struct file *file = iocb->ki_filp;
4790         struct inode *inode = file_inode(file);
4791         ssize_t ret;
4792
4793         if (iocb->ki_flags & IOCB_NOWAIT)
4794                 return -EOPNOTSUPP;
4795
4796         ret = generic_perform_write(iocb, from);
4797
4798         if (ret > 0) {
4799                 f2fs_update_iostat(F2FS_I_SB(inode), inode,
4800                                                 APP_BUFFERED_IO, ret);
4801         }
4802         return ret;
4803 }
4804
4805 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
4806                                  unsigned int flags)
4807 {
4808         struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
4809
4810         dec_page_count(sbi, F2FS_DIO_WRITE);
4811         if (error)
4812                 return error;
4813         f2fs_update_time(sbi, REQ_TIME);
4814         f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size);
4815         return 0;
4816 }
4817
4818 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
4819                                         struct bio *bio, loff_t file_offset)
4820 {
4821         struct inode *inode = iter->inode;
4822         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4823         int seg_type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint);
4824         enum temp_type temp = f2fs_get_segment_temp(seg_type);
4825
4826         bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
4827         submit_bio(bio);
4828 }
4829
4830 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
4831         .end_io         = f2fs_dio_write_end_io,
4832         .submit_io      = f2fs_dio_write_submit_io,
4833 };
4834
4835 static void f2fs_flush_buffered_write(struct address_space *mapping,
4836                                       loff_t start_pos, loff_t end_pos)
4837 {
4838         int ret;
4839
4840         ret = filemap_write_and_wait_range(mapping, start_pos, end_pos);
4841         if (ret < 0)
4842                 return;
4843         invalidate_mapping_pages(mapping,
4844                                  start_pos >> PAGE_SHIFT,
4845                                  end_pos >> PAGE_SHIFT);
4846 }
4847
4848 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
4849                                    bool *may_need_sync)
4850 {
4851         struct file *file = iocb->ki_filp;
4852         struct inode *inode = file_inode(file);
4853         struct f2fs_inode_info *fi = F2FS_I(inode);
4854         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4855         const bool do_opu = f2fs_lfs_mode(sbi);
4856         const loff_t pos = iocb->ki_pos;
4857         const ssize_t count = iov_iter_count(from);
4858         unsigned int dio_flags;
4859         struct iomap_dio *dio;
4860         ssize_t ret;
4861
4862         trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
4863
4864         if (iocb->ki_flags & IOCB_NOWAIT) {
4865                 /* f2fs_convert_inline_inode() and block allocation can block */
4866                 if (f2fs_has_inline_data(inode) ||
4867                     !f2fs_overwrite_io(inode, pos, count)) {
4868                         ret = -EAGAIN;
4869                         goto out;
4870                 }
4871
4872                 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
4873                         ret = -EAGAIN;
4874                         goto out;
4875                 }
4876                 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
4877                         f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
4878                         ret = -EAGAIN;
4879                         goto out;
4880                 }
4881         } else {
4882                 ret = f2fs_convert_inline_inode(inode);
4883                 if (ret)
4884                         goto out;
4885
4886                 f2fs_down_read(&fi->i_gc_rwsem[WRITE]);
4887                 if (do_opu)
4888                         f2fs_down_read(&fi->i_gc_rwsem[READ]);
4889         }
4890
4891         /*
4892          * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
4893          * the higher-level function iomap_dio_rw() in order to ensure that the
4894          * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
4895          */
4896         inc_page_count(sbi, F2FS_DIO_WRITE);
4897         dio_flags = 0;
4898         if (pos + count > inode->i_size)
4899                 dio_flags |= IOMAP_DIO_FORCE_WAIT;
4900         dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
4901                              &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
4902         if (IS_ERR_OR_NULL(dio)) {
4903                 ret = PTR_ERR_OR_ZERO(dio);
4904                 if (ret == -ENOTBLK)
4905                         ret = 0;
4906                 if (ret != -EIOCBQUEUED)
4907                         dec_page_count(sbi, F2FS_DIO_WRITE);
4908         } else {
4909                 ret = iomap_dio_complete(dio);
4910         }
4911
4912         if (do_opu)
4913                 f2fs_up_read(&fi->i_gc_rwsem[READ]);
4914         f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
4915
4916         if (ret < 0)
4917                 goto out;
4918         if (pos + ret > inode->i_size)
4919                 f2fs_i_size_write(inode, pos + ret);
4920         if (!do_opu)
4921                 set_inode_flag(inode, FI_UPDATE_WRITE);
4922
4923         if (iov_iter_count(from)) {
4924                 ssize_t ret2;
4925                 loff_t bufio_start_pos = iocb->ki_pos;
4926
4927                 /*
4928                  * The direct write was partial, so we need to fall back to a
4929                  * buffered write for the remainder.
4930                  */
4931
4932                 ret2 = f2fs_buffered_write_iter(iocb, from);
4933                 if (iov_iter_count(from))
4934                         f2fs_write_failed(inode, iocb->ki_pos);
4935                 if (ret2 < 0)
4936                         goto out;
4937
4938                 /*
4939                  * Ensure that the pagecache pages are written to disk and
4940                  * invalidated to preserve the expected O_DIRECT semantics.
4941                  */
4942                 if (ret2 > 0) {
4943                         loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
4944
4945                         ret += ret2;
4946
4947                         f2fs_flush_buffered_write(file->f_mapping,
4948                                                   bufio_start_pos,
4949                                                   bufio_end_pos);
4950                 }
4951         } else {
4952                 /* iomap_dio_rw() already handled the generic_write_sync(). */
4953                 *may_need_sync = false;
4954         }
4955 out:
4956         trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
4957         return ret;
4958 }
4959
4960 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
4961 {
4962         struct inode *inode = file_inode(iocb->ki_filp);
4963         const loff_t orig_pos = iocb->ki_pos;
4964         const size_t orig_count = iov_iter_count(from);
4965         loff_t target_size;
4966         bool dio;
4967         bool may_need_sync = true;
4968         int preallocated;
4969         const loff_t pos = iocb->ki_pos;
4970         const ssize_t count = iov_iter_count(from);
4971         ssize_t ret;
4972
4973         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
4974                 ret = -EIO;
4975                 goto out;
4976         }
4977
4978         if (!f2fs_is_compress_backend_ready(inode)) {
4979                 ret = -EOPNOTSUPP;
4980                 goto out;
4981         }
4982
4983         if (iocb->ki_flags & IOCB_NOWAIT) {
4984                 if (!inode_trylock(inode)) {
4985                         ret = -EAGAIN;
4986                         goto out;
4987                 }
4988         } else {
4989                 inode_lock(inode);
4990         }
4991
4992         if (f2fs_is_pinned_file(inode) &&
4993             !f2fs_overwrite_io(inode, pos, count)) {
4994                 ret = -EIO;
4995                 goto out_unlock;
4996         }
4997
4998         ret = f2fs_write_checks(iocb, from);
4999         if (ret <= 0)
5000                 goto out_unlock;
5001
5002         /* Determine whether we will do a direct write or a buffered write. */
5003         dio = f2fs_should_use_dio(inode, iocb, from);
5004
5005         /* dio is not compatible w/ atomic write */
5006         if (dio && f2fs_is_atomic_file(inode)) {
5007                 ret = -EOPNOTSUPP;
5008                 goto out_unlock;
5009         }
5010
5011         /* Possibly preallocate the blocks for the write. */
5012         target_size = iocb->ki_pos + iov_iter_count(from);
5013         preallocated = f2fs_preallocate_blocks(iocb, from, dio);
5014         if (preallocated < 0) {
5015                 ret = preallocated;
5016         } else {
5017                 if (trace_f2fs_datawrite_start_enabled())
5018                         f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
5019                                                 orig_count, WRITE);
5020
5021                 /* Do the actual write. */
5022                 ret = dio ?
5023                         f2fs_dio_write_iter(iocb, from, &may_need_sync) :
5024                         f2fs_buffered_write_iter(iocb, from);
5025
5026                 if (trace_f2fs_datawrite_end_enabled())
5027                         trace_f2fs_datawrite_end(inode, orig_pos, ret);
5028         }
5029
5030         /* Don't leave any preallocated blocks around past i_size. */
5031         if (preallocated && i_size_read(inode) < target_size) {
5032                 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5033                 filemap_invalidate_lock(inode->i_mapping);
5034                 if (!f2fs_truncate(inode))
5035                         file_dont_truncate(inode);
5036                 filemap_invalidate_unlock(inode->i_mapping);
5037                 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5038         } else {
5039                 file_dont_truncate(inode);
5040         }
5041
5042         clear_inode_flag(inode, FI_PREALLOCATED_ALL);
5043 out_unlock:
5044         inode_unlock(inode);
5045 out:
5046         trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
5047
5048         if (ret > 0 && may_need_sync)
5049                 ret = generic_write_sync(iocb, ret);
5050
5051         /* If buffered IO was forced, flush and drop the data from
5052          * the page cache to preserve O_DIRECT semantics
5053          */
5054         if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT))
5055                 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping,
5056                                           orig_pos,
5057                                           orig_pos + ret - 1);
5058
5059         return ret;
5060 }
5061
5062 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
5063                 int advice)
5064 {
5065         struct address_space *mapping;
5066         struct backing_dev_info *bdi;
5067         struct inode *inode = file_inode(filp);
5068         int err;
5069
5070         if (advice == POSIX_FADV_SEQUENTIAL) {
5071                 if (S_ISFIFO(inode->i_mode))
5072                         return -ESPIPE;
5073
5074                 mapping = filp->f_mapping;
5075                 if (!mapping || len < 0)
5076                         return -EINVAL;
5077
5078                 bdi = inode_to_bdi(mapping->host);
5079                 filp->f_ra.ra_pages = bdi->ra_pages *
5080                         F2FS_I_SB(inode)->seq_file_ra_mul;
5081                 spin_lock(&filp->f_lock);
5082                 filp->f_mode &= ~FMODE_RANDOM;
5083                 spin_unlock(&filp->f_lock);
5084                 return 0;
5085         } else if (advice == POSIX_FADV_WILLNEED && offset == 0) {
5086                 /* Load extent cache at the first readahead. */
5087                 f2fs_precache_extents(inode);
5088         }
5089
5090         err = generic_fadvise(filp, offset, len, advice);
5091         if (!err && advice == POSIX_FADV_DONTNEED &&
5092                 test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
5093                 f2fs_compressed_file(inode))
5094                 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
5095
5096         return err;
5097 }
5098
5099 #ifdef CONFIG_COMPAT
5100 struct compat_f2fs_gc_range {
5101         u32 sync;
5102         compat_u64 start;
5103         compat_u64 len;
5104 };
5105 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE        _IOW(F2FS_IOCTL_MAGIC, 11,\
5106                                                 struct compat_f2fs_gc_range)
5107
5108 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg)
5109 {
5110         struct compat_f2fs_gc_range __user *urange;
5111         struct f2fs_gc_range range;
5112         int err;
5113
5114         urange = compat_ptr(arg);
5115         err = get_user(range.sync, &urange->sync);
5116         err |= get_user(range.start, &urange->start);
5117         err |= get_user(range.len, &urange->len);
5118         if (err)
5119                 return -EFAULT;
5120
5121         return __f2fs_ioc_gc_range(file, &range);
5122 }
5123
5124 struct compat_f2fs_move_range {
5125         u32 dst_fd;
5126         compat_u64 pos_in;
5127         compat_u64 pos_out;
5128         compat_u64 len;
5129 };
5130 #define F2FS_IOC32_MOVE_RANGE           _IOWR(F2FS_IOCTL_MAGIC, 9,      \
5131                                         struct compat_f2fs_move_range)
5132
5133 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg)
5134 {
5135         struct compat_f2fs_move_range __user *urange;
5136         struct f2fs_move_range range;
5137         int err;
5138
5139         urange = compat_ptr(arg);
5140         err = get_user(range.dst_fd, &urange->dst_fd);
5141         err |= get_user(range.pos_in, &urange->pos_in);
5142         err |= get_user(range.pos_out, &urange->pos_out);
5143         err |= get_user(range.len, &urange->len);
5144         if (err)
5145                 return -EFAULT;
5146
5147         return __f2fs_ioc_move_range(file, &range);
5148 }
5149
5150 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
5151 {
5152         if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
5153                 return -EIO;
5154         if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file))))
5155                 return -ENOSPC;
5156
5157         switch (cmd) {
5158         case FS_IOC32_GETVERSION:
5159                 cmd = FS_IOC_GETVERSION;
5160                 break;
5161         case F2FS_IOC32_GARBAGE_COLLECT_RANGE:
5162                 return f2fs_compat_ioc_gc_range(file, arg);
5163         case F2FS_IOC32_MOVE_RANGE:
5164                 return f2fs_compat_ioc_move_range(file, arg);
5165         case F2FS_IOC_START_ATOMIC_WRITE:
5166         case F2FS_IOC_START_ATOMIC_REPLACE:
5167         case F2FS_IOC_COMMIT_ATOMIC_WRITE:
5168         case F2FS_IOC_START_VOLATILE_WRITE:
5169         case F2FS_IOC_RELEASE_VOLATILE_WRITE:
5170         case F2FS_IOC_ABORT_ATOMIC_WRITE:
5171         case F2FS_IOC_SHUTDOWN:
5172         case FITRIM:
5173         case FS_IOC_SET_ENCRYPTION_POLICY:
5174         case FS_IOC_GET_ENCRYPTION_PWSALT:
5175         case FS_IOC_GET_ENCRYPTION_POLICY:
5176         case FS_IOC_GET_ENCRYPTION_POLICY_EX:
5177         case FS_IOC_ADD_ENCRYPTION_KEY:
5178         case FS_IOC_REMOVE_ENCRYPTION_KEY:
5179         case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
5180         case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
5181         case FS_IOC_GET_ENCRYPTION_NONCE:
5182         case F2FS_IOC_GARBAGE_COLLECT:
5183         case F2FS_IOC_WRITE_CHECKPOINT:
5184         case F2FS_IOC_DEFRAGMENT:
5185         case F2FS_IOC_FLUSH_DEVICE:
5186         case F2FS_IOC_GET_FEATURES:
5187         case F2FS_IOC_GET_PIN_FILE:
5188         case F2FS_IOC_SET_PIN_FILE:
5189         case F2FS_IOC_PRECACHE_EXTENTS:
5190         case F2FS_IOC_RESIZE_FS:
5191         case FS_IOC_ENABLE_VERITY:
5192         case FS_IOC_MEASURE_VERITY:
5193         case FS_IOC_READ_VERITY_METADATA:
5194         case FS_IOC_GETFSLABEL:
5195         case FS_IOC_SETFSLABEL:
5196         case F2FS_IOC_GET_COMPRESS_BLOCKS:
5197         case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
5198         case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
5199         case F2FS_IOC_SEC_TRIM_FILE:
5200         case F2FS_IOC_GET_COMPRESS_OPTION:
5201         case F2FS_IOC_SET_COMPRESS_OPTION:
5202         case F2FS_IOC_DECOMPRESS_FILE:
5203         case F2FS_IOC_COMPRESS_FILE:
5204                 break;
5205         default:
5206                 return -ENOIOCTLCMD;
5207         }
5208         return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
5209 }
5210 #endif
5211
5212 const struct file_operations f2fs_file_operations = {
5213         .llseek         = f2fs_llseek,
5214         .read_iter      = f2fs_file_read_iter,
5215         .write_iter     = f2fs_file_write_iter,
5216         .iopoll         = iocb_bio_iopoll,
5217         .open           = f2fs_file_open,
5218         .release        = f2fs_release_file,
5219         .mmap           = f2fs_file_mmap,
5220         .flush          = f2fs_file_flush,
5221         .fsync          = f2fs_sync_file,
5222         .fallocate      = f2fs_fallocate,
5223         .unlocked_ioctl = f2fs_ioctl,
5224 #ifdef CONFIG_COMPAT
5225         .compat_ioctl   = f2fs_compat_ioctl,
5226 #endif
5227         .splice_read    = f2fs_file_splice_read,
5228         .splice_write   = iter_file_splice_write,
5229         .fadvise        = f2fs_file_fadvise,
5230         .fop_flags      = FOP_BUFFER_RASYNC,
5231 };