Merge tag 'writeback_for_v5.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <[email protected]>

Fri, 28 Aug 2020 17:57:14 +0000 (10:57 -0700)

committer Linus Torvalds <[email protected]>

Fri, 28 Aug 2020 17:57:14 +0000 (10:57 -0700)
author Linus Torvalds <[email protected]>
Fri, 28 Aug 2020 17:57:14 +0000 (10:57 -0700)
committer Linus Torvalds <[email protected]>
Fri, 28 Aug 2020 17:57:14 +0000 (10:57 -0700)
diff --combined fs/ext4/inode.c

index 3a196d81f59497afa7408af427122ad728c2e3d8,4db497f02ffbbb0971854f1fa1816f054ee3c7a3..bf596467c234ce052c4b784cea15c30cd7da118d
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -383,7 -383,7 +383,7 @@@ void ext4_da_update_reserve_space(struc
          */
         if ((ei->i_reserved_data_blocks == 0) &&
             !inode_is_open_for_write(inode))
- -              ext4_discard_preallocations(inode);
+ +              ext4_discard_preallocations(inode, 0);
   }
   
   static int __check_block_validity(struct inode *inode, const char *func,
@@@ -394,7 -394,8 +394,7 @@@
             (inode->i_ino ==
              le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
                 return 0;
- -      if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
- -                                 map->m_len)) {
+ +      if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
                 ext4_error_inode(inode, func, line, map->m_pblk,
                                  "lblock %lu mapped to illegal pblock %llu "
                                  "(length %d)", (unsigned long) map->m_lblk,
@@@ -1095,7 -1096,7 +1095,7 @@@ static int ext4_block_write_begin(struc
         }
         if (unlikely(err)) {
                 page_zero_new_buffers(page, from, to);
- -      } else if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
+ +      } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
                 for (i = 0; i < nr_wait; i++) {
                         int err2;
   
@@@ -3287,7 -3288,7 +3287,7 @@@ static int ext4_releasepage(struct pag
         if (PageChecked(page))
                 return 0;
         if (journal)
- -              return jbd2_journal_try_to_free_buffers(journal, page, wait);
+ +              return jbd2_journal_try_to_free_buffers(journal, page);
         else
                 return try_to_free_buffers(page);
   }
@@@ -3736,7 -3737,7 +3736,7 @@@ static int __ext4_block_zero_page_range
                 /* Uhhuh. Read error. Complain and punt. */
                 if (!buffer_uptodate(bh))
                         goto unlock;
- -              if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) {
+ +              if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
                         /* We expect the key to be set. */
                         BUG_ON(!fscrypt_has_encryption_key(inode));
                         err = fscrypt_decrypt_pagecache_blocks(page, blocksize,
@@@ -4055,7 -4056,7 +4055,7 @@@ int ext4_punch_hole(struct inode *inode
         if (stop_block > first_block) {
   
                 down_write(&EXT4_I(inode)->i_data_sem);
- -              ext4_discard_preallocations(inode);
+ +              ext4_discard_preallocations(inode, 0);
   
                 ret = ext4_es_remove_extent(inode, first_block,
                                             stop_block - first_block);
@@@ -4162,7 -4163,7 +4162,7 @@@ int ext4_truncate(struct inode *inode
         trace_ext4_truncate_enter(inode);
   
         if (!ext4_can_truncate(inode))
- -              return 0;
+ +              goto out_trace;
   
         if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
                 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
@@@ -4171,14 -4172,16 +4171,14 @@@
                 int has_inline = 1;
   
                 err = ext4_inline_data_truncate(inode, &has_inline);
- -              if (err)
- -                      return err;
- -              if (has_inline)
- -                      return 0;
+ +              if (err || has_inline)
+ +                      goto out_trace;
         }
   
         /* If we zero-out tail of the page, we have to create jinode for jbd2 */
         if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
                 if (ext4_inode_attach_jinode(inode) < 0)
- -                      return 0;
+ +                      goto out_trace;
         }
   
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@@ -4187,10 -4190,8 +4187,10 @@@
                 credits = ext4_blocks_for_truncate(inode);
   
         handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
- -      if (IS_ERR(handle))
- -              return PTR_ERR(handle);
+ +      if (IS_ERR(handle)) {
+ +              err = PTR_ERR(handle);
+ +              goto out_trace;
+ +      }
   
         if (inode->i_size & (inode->i_sb->s_blocksize - 1))
                 ext4_block_truncate_page(handle, mapping, inode->i_size);
@@@ -4210,7 -4211,7 +4210,7 @@@
   
         down_write(&EXT4_I(inode)->i_data_sem);
   
- -      ext4_discard_preallocations(inode);
+ +      ext4_discard_preallocations(inode, 0);
   
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                 err = ext4_ext_truncate(handle, inode);
@@@ -4241,7 -4242,6 +4241,7 @@@ out_stop
                 err = err2;
         ext4_journal_stop(handle);
   
+ +out_trace:
         trace_ext4_truncate_exit(inode);
         return err;
   }
@@@ -4403,11 -4403,9 +4403,11 @@@ int ext4_get_inode_loc(struct inode *in
                 !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
   }
   
- -static bool ext4_should_use_dax(struct inode *inode)
+ +static bool ext4_should_enable_dax(struct inode *inode)
   {
- -      if (!test_opt(inode->i_sb, DAX))
+ +      struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ +
+ +      if (test_opt2(inode->i_sb, DAX_NEVER))
                 return false;
         if (!S_ISREG(inode->i_mode))
                 return false;
@@@ -4419,21 -4417,14 +4419,21 @@@
                 return false;
         if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
                 return false;
- -      return true;
+ +      if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags))
+ +              return false;
+ +      if (test_opt(inode->i_sb, DAX_ALWAYS))
+ +              return true;
+ +
+ +      return ext4_test_inode_flag(inode, EXT4_INODE_DAX);
   }
   
- -void ext4_set_inode_flags(struct inode *inode)
+ +void ext4_set_inode_flags(struct inode *inode, bool init)
   {
         unsigned int flags = EXT4_I(inode)->i_flags;
         unsigned int new_fl = 0;
   
+ +      WARN_ON_ONCE(IS_DAX(inode) && init);
+ +
         if (flags & EXT4_SYNC_FL)
                 new_fl |= S_SYNC;
         if (flags & EXT4_APPEND_FL)
@@@ -4444,13 -4435,8 +4444,13 @@@
                 new_fl |= S_NOATIME;
         if (flags & EXT4_DIRSYNC_FL)
                 new_fl |= S_DIRSYNC;
- -      if (ext4_should_use_dax(inode))
+ +
+ +      /* Because of the way inode_set_flags() works we must preserve S_DAX
+ +       * here if already set. */
+ +      new_fl |= (inode->i_flags & S_DAX);
+ +      if (init && ext4_should_enable_dax(inode))
                 new_fl |= S_DAX;
+ +
         if (flags & EXT4_ENCRYPT_FL)
                 new_fl |= S_ENCRYPTED;
         if (flags & EXT4_CASEFOLD_FL)
@@@ -4664,7 -4650,7 +4664,7 @@@ struct inode *__ext4_iget(struct super_
                  * not initialized on a new filesystem. */
         }
         ei->i_flags = le32_to_cpu(raw_inode->i_flags);
- -      ext4_set_inode_flags(inode);
+ +      ext4_set_inode_flags(inode, true);
         inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
         ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
         if (ext4_has_feature_64bit(sb))
@@@ -4760,7 -4746,7 +4760,7 @@@
   
         ret = 0;
         if (ei->i_file_acl &&
- -          !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
+ +          !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) {
                 ext4_error_inode(inode, function, line, 0,
                                  "iget: bad extended attribute block %llu",
                                  ei->i_file_acl);
@@@ -4901,7 -4887,7 +4901,7 @@@ static void __ext4_update_other_inode_t
             (inode->i_state & I_DIRTY_TIME)) {
                 struct ext4_inode_info  *ei = EXT4_I(inode);
   
-               inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+               inode->i_state &= ~I_DIRTY_TIME;
                 spin_unlock(&inode->i_lock);
   
                 spin_lock(&ei->i_raw_lock);
diff --combined fs/xfs/libxfs/xfs_trans_inode.c

index e15129647e00c97143dcfd5b40cf376e3fd8d20f,1b4df6636944c5d161cd0a01f436212a8d0bc0e4..b7e222befb085fb749bd728ab8559ea305498342
--- 1/fs/xfs/libxfs/xfs_trans_inode.c
--- 2/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@@ -8,8 -8,6 +8,8 @@@
   #include "xfs_shared.h"
   #include "xfs_format.h"
   #include "xfs_log_format.h"
+ +#include "xfs_trans_resv.h"
+ +#include "xfs_mount.h"
   #include "xfs_inode.h"
   #include "xfs_trans.h"
   #include "xfs_trans_priv.h"
@@@ -38,7 -36,6 +38,7 @@@ xfs_trans_ijoin
   
         ASSERT(iip->ili_lock_flags == 0);
         iip->ili_lock_flags = lock_flags;
+ +      ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
   
         /*
          * Get a log_item_desc to point at the new item.
@@@ -74,35 -71,24 +74,35 @@@ xfs_trans_ichgtime
   }
   
   /*
- - * This is called to mark the fields indicated in fieldmask as needing
- - * to be logged when the transaction is committed.  The inode must
- - * already be associated with the given transaction.
+ + * This is called to mark the fields indicated in fieldmask as needing to be
+ + * logged when the transaction is committed.  The inode must already be
+ + * associated with the given transaction.
    *
- - * The values for fieldmask are defined in xfs_inode_item.h.  We always
- - * log all of the core inode if any of it has changed, and we always log
- - * all of the inline data/extents/b-tree root if any of them has changed.
+ + * The values for fieldmask are defined in xfs_inode_item.h.  We always log all
+ + * of the core inode if any of it has changed, and we always log all of the
+ + * inline data/extents/b-tree root if any of them has changed.
+ + *
+ + * Grab and pin the cluster buffer associated with this inode to avoid RMW
+ + * cycles at inode writeback time. Avoid the need to add error handling to every
+ + * xfs_trans_log_inode() call by shutting down on read error.  This will cause
+ + * transactions to fail and everything to error out, just like if we return a
+ + * read error in a dirty transaction and cancel it.
    */
   void
   xfs_trans_log_inode(
- -      xfs_trans_t     *tp,
- -      xfs_inode_t     *ip,
- -      uint            flags)
+ +      struct xfs_trans        *tp,
+ +      struct xfs_inode        *ip,
+ +      uint                    flags)
   {
- -      struct inode    *inode = VFS_I(ip);
+ +      struct xfs_inode_log_item *iip = ip->i_itemp;
+ +      struct inode            *inode = VFS_I(ip);
+ +      uint                    iversion_flags = 0;
   
- -      ASSERT(ip->i_itemp != NULL);
+ +      ASSERT(iip);
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ +      ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
+ +
+ +      tp->t_flags |= XFS_TRANS_DIRTY;
   
         /*
          * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
@@@ -110,12 -96,21 +110,12 @@@
          * to log the timestamps, or will clear already cleared fields in the
          * worst case.
          */
-       if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
+       if (inode->i_state & I_DIRTY_TIME) {
                 spin_lock(&inode->i_lock);
-               inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+               inode->i_state &= ~I_DIRTY_TIME;
                 spin_unlock(&inode->i_lock);
         }
   
- -      /*
- -       * Record the specific change for fdatasync optimisation. This
- -       * allows fdatasync to skip log forces for inodes that are only
- -       * timestamp dirty. We do this before the change count so that
- -       * the core being logged in this case does not impact on fdatasync
- -       * behaviour.
- -       */
- -      ip->i_itemp->ili_fsync_fields |= flags;
- -
         /*
          * First time we log the inode in a transaction, bump the inode change
          * counter if it is configured for this to occur. While we have the
@@@ -125,64 -120,23 +125,64 @@@
          * set however, then go ahead and bump the i_version counter
          * unconditionally.
          */
- -      if (!test_and_set_bit(XFS_LI_DIRTY, &ip->i_itemp->ili_item.li_flags) &&
- -          IS_I_VERSION(VFS_I(ip))) {
- -              if (inode_maybe_inc_iversion(VFS_I(ip), flags & XFS_ILOG_CORE))
- -                      flags |= XFS_ILOG_CORE;
+ +      if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) {
+ +              if (IS_I_VERSION(inode) &&
+ +                  inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE))
+ +                      iversion_flags = XFS_ILOG_CORE;
         }
   
- -      tp->t_flags |= XFS_TRANS_DIRTY;
+ +      /*
+ +       * Record the specific change for fdatasync optimisation. This allows
+ +       * fdatasync to skip log forces for inodes that are only timestamp
+ +       * dirty.
+ +       */
+ +      spin_lock(&iip->ili_lock);
+ +      iip->ili_fsync_fields |= flags;
+ +
+ +      if (!iip->ili_item.li_buf) {
+ +              struct xfs_buf  *bp;
+ +              int             error;
+ +
+ +              /*
+ +               * We hold the ILOCK here, so this inode is not going to be
+ +               * flushed while we are here. Further, because there is no
+ +               * buffer attached to the item, we know that there is no IO in
+ +               * progress, so nothing will clear the ili_fields while we read
+ +               * in the buffer. Hence we can safely drop the spin lock and
+ +               * read the buffer knowing that the state will not change from
+ +               * here.
+ +               */
+ +              spin_unlock(&iip->ili_lock);
+ +              error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, NULL,
+ +                                      &bp, 0);
+ +              if (error) {
+ +                      xfs_force_shutdown(ip->i_mount, SHUTDOWN_META_IO_ERROR);
+ +                      return;
+ +              }
+ +
+ +              /*
+ +               * We need an explicit buffer reference for the log item but
+ +               * don't want the buffer to remain attached to the transaction.
+ +               * Hold the buffer but release the transaction reference once
+ +               * we've attached the inode log item to the buffer log item
+ +               * list.
+ +               */
+ +              xfs_buf_hold(bp);
+ +              spin_lock(&iip->ili_lock);
+ +              iip->ili_item.li_buf = bp;
+ +              bp->b_flags |= _XBF_INODES;
+ +              list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
+ +              xfs_trans_brelse(tp, bp);
+ +      }
   
         /*
- -       * Always OR in the bits from the ili_last_fields field.
- -       * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
- -       * routines in the eventual clearing of the ili_fields bits.
- -       * See the big comment in xfs_iflush() for an explanation of
- -       * this coordination mechanism.
+ +       * Always OR in the bits from the ili_last_fields field.  This is to
+ +       * coordinate with the xfs_iflush() and xfs_iflush_done() routines in
+ +       * the eventual clearing of the ili_fields bits.  See the big comment in
+ +       * xfs_iflush() for an explanation of this coordination mechanism.
          */
- -      flags |= ip->i_itemp->ili_last_fields;
- -      ip->i_itemp->ili_fields |= flags;
+ +      iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
+ +      spin_unlock(&iip->ili_lock);
   }
   
   int
diff --combined include/linux/fs.h

index e019ea2f1347e6ebddea6cfeab01047c6af00660,45eadf5bea5d72b9466adbf8ba72c9aceb125be1..7519ae003a082cfd3d87d7628a91bd8f3b3a9fda
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -175,9 -175,6 +175,9 @@@ typedef int (dio_iodone_t)(struct kioc
   /* File does not contribute to nr_files count */
   #define FMODE_NOACCOUNT               ((__force fmode_t)0x20000000)
   
+ +/* File supports async buffered reads */
+ +#define FMODE_BUF_RASYNC      ((__force fmode_t)0x40000000)
+ +
   /*
    * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
    * that indicates that they should check the contents of the iovec are
@@@ -318,9 -315,6 +318,9 @@@ enum rw_hint 
   #define IOCB_SYNC             (1 << 5)
   #define IOCB_WRITE            (1 << 6)
   #define IOCB_NOWAIT           (1 << 7)
+ +/* iocb->ki_waitq is valid */
+ +#define IOCB_WAITQ            (1 << 8)
+ +#define IOCB_NOIO             (1 << 9)
   
   struct kiocb {
         struct file             *ki_filp;
@@@ -334,10 -328,7 +334,10 @@@
         int                     ki_flags;
         u16                     ki_hint;
         u16                     ki_ioprio; /* See linux/ioprio.h */
- -      unsigned int            ki_cookie; /* for ->iopoll */
+ +      union {
+ +              unsigned int            ki_cookie; /* for ->iopoll */
+ +              struct wait_page_queue  *ki_waitq; /* for async buffered IO */
+ +      };
   
         randomized_struct_fields_end
   };
@@@ -479,6 -470,45 +479,6 @@@ struct address_space 
          * must be enforced here for CRIS, to let the least significant bit
          * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
          */
- -struct request_queue;
- -
- -struct block_device {
- -      dev_t                   bd_dev;  /* not a kdev_t - it's a search key */
- -      int                     bd_openers;
- -      struct inode *          bd_inode;       /* will die */
- -      struct super_block *    bd_super;
- -      struct mutex            bd_mutex;       /* open/close mutex */
- -      void *                  bd_claiming;
- -      void *                  bd_holder;
- -      int                     bd_holders;
- -      bool                    bd_write_holder;
- -#ifdef CONFIG_SYSFS
- -      struct list_head        bd_holder_disks;
- -#endif
- -      struct block_device *   bd_contains;
- -      unsigned                bd_block_size;
- -      u8                      bd_partno;
- -      struct hd_struct *      bd_part;
- -      /* number of times partitions within this device have been opened. */
- -      unsigned                bd_part_count;
- -      int                     bd_invalidated;
- -      struct gendisk *        bd_disk;
- -      struct request_queue *  bd_queue;
- -      struct backing_dev_info *bd_bdi;
- -      struct list_head        bd_list;
- -      /*
- -       * Private data.  You must have bd_claim'ed the block_device
- -       * to use this.  NOTE:  bd_claim allows an owner to claim
- -       * the same device multiple times, the owner must take special
- -       * care to not mess up bd_private for that case.
- -       */
- -      unsigned long           bd_private;
- -
- -      /* The counter of freeze processes */
- -      int                     bd_fsfreeze_count;
- -      /* Mutex for freeze */
- -      struct mutex            bd_fsfreeze_mutex;
- -} __randomize_layout;
   
   /* XArray tags, for tagging dirty and writeback pages in the pagecache. */
   #define PAGECACHE_TAG_DIRTY   XA_MARK_0
@@@ -518,16 -548,6 +518,16 @@@ static inline void i_mmap_unlock_read(s
         up_read(&mapping->i_mmap_rwsem);
   }
   
+ +static inline void i_mmap_assert_locked(struct address_space *mapping)
+ +{
+ +      lockdep_assert_held(&mapping->i_mmap_rwsem);
+ +}
+ +
+ +static inline void i_mmap_assert_write_locked(struct address_space *mapping)
+ +{
+ +      lockdep_assert_held_write(&mapping->i_mmap_rwsem);
+ +}
+ +
   /*
    * Might pages of this file be mapped into userspace?
    */
@@@ -538,7 -558,7 +538,7 @@@ static inline int mapping_mapped(struc
   
   /*
    * Might pages of this file have been modified in userspace?
- - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
+ + * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
    * marks vma as VM_SHARED if it is shared, and the file was opened for
    * writing i.e. vma may be mprotected writable even if now readonly.
    *
@@@ -887,6 -907,8 +887,6 @@@ static inline unsigned imajor(const str
         return MAJOR(inode->i_rdev);
   }
   
- -extern struct block_device *I_BDEV(struct inode *inode);
- -
   struct fown_struct {
         rwlock_t lock;          /* protects pid, uid, euid fields */
         struct pid *pid;        /* pid or -pgrp where SIGIO should be sent */
@@@ -1026,7 -1048,6 +1026,7 @@@ struct lock_manager_operations 
         bool (*lm_break)(struct file_lock *);
         int (*lm_change)(struct file_lock *, int, struct list_head *);
         void (*lm_setup)(struct file_lock *, void **);
+ +      bool (*lm_breaker_owns_lease)(struct file_lock *);
   };
   
   struct lock_manager {
@@@ -1358,7 -1379,6 +1358,7 @@@ extern int send_sigurg(struct fown_stru
   #define SB_NODIRATIME 2048    /* Do not update directory access times */
   #define SB_SILENT     32768
   #define SB_POSIXACL   (1<<16) /* VFS does not apply the umask */
+ +#define SB_INLINECRYPT        (1<<17) /* Use blk-crypto for encrypted files */
   #define SB_KERNMOUNT  (1<<22) /* this is a kern_mount call */
   #define SB_I_VERSION  (1<<23) /* Update inode I_version field */
   #define SB_LAZYTIME   (1<<25) /* Update the on-disk [acm]times lazily */
@@@ -1722,10 -1742,6 +1722,10 @@@ int vfs_mkobj(struct dentry *, umode_t
                 int (*f)(struct dentry *, umode_t, void *),
                 void *);
   
+ +int vfs_fchown(struct file *file, uid_t user, gid_t group);
+ +int vfs_fchmod(struct file *file, umode_t mode);
+ +int vfs_utimes(const struct path *path, struct timespec64 *times);
+ +
   extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
   
   #ifdef CONFIG_COMPAT
@@@ -1757,6 -1773,14 +1757,6 @@@ struct dir_context 
         loff_t pos;
   };
   
- -struct block_device_operations;
- -
- -/* These macros are for out of kernel modules to test that
- - * the kernel supports the unlocked_ioctl and compat_ioctl
- - * fields in struct file_operations. */
- -#define HAVE_COMPAT_IOCTL 1
- -#define HAVE_UNLOCKED_IOCTL 1
- -
   /*
    * These flags let !MMU mmap() govern direct device mapping vs immediate
    * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
@@@ -1892,6 -1916,7 +1892,6 @@@ ssize_t rw_copy_check_uvector(int type
                               struct iovec *fast_pointer,
                               struct iovec **ret_pointer);
   
- -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
   extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
@@@ -1956,27 -1981,27 +1956,27 @@@ struct super_operations 
   /*
    * Inode flags - they have no relation to superblock flags now
    */
- -#define S_SYNC                1       /* Writes are synced at once */
- -#define S_NOATIME     2       /* Do not update access times */
- -#define S_APPEND      4       /* Append-only file */
- -#define S_IMMUTABLE   8       /* Immutable file */
- -#define S_DEAD                16      /* removed, but still open directory */
- -#define S_NOQUOTA     32      /* Inode is not counted to quota */
- -#define S_DIRSYNC     64      /* Directory modifications are synchronous */
- -#define S_NOCMTIME    128     /* Do not update file c/mtime */
- -#define S_SWAPFILE    256     /* Do not truncate: swapon got its bmaps */
- -#define S_PRIVATE     512     /* Inode is fs-internal */
- -#define S_IMA         1024    /* Inode has an associated IMA struct */
- -#define S_AUTOMOUNT   2048    /* Automount/referral quasi-directory */
- -#define S_NOSEC               4096    /* no suid or xattr security attributes */
+ +#define S_SYNC                (1 << 0)  /* Writes are synced at once */
+ +#define S_NOATIME     (1 << 1)  /* Do not update access times */
+ +#define S_APPEND      (1 << 2)  /* Append-only file */
+ +#define S_IMMUTABLE   (1 << 3)  /* Immutable file */
+ +#define S_DEAD                (1 << 4)  /* removed, but still open directory */
+ +#define S_NOQUOTA     (1 << 5)  /* Inode is not counted to quota */
+ +#define S_DIRSYNC     (1 << 6)  /* Directory modifications are synchronous */
+ +#define S_NOCMTIME    (1 << 7)  /* Do not update file c/mtime */
+ +#define S_SWAPFILE    (1 << 8)  /* Do not truncate: swapon got its bmaps */
+ +#define S_PRIVATE     (1 << 9)  /* Inode is fs-internal */
+ +#define S_IMA         (1 << 10) /* Inode has an associated IMA struct */
+ +#define S_AUTOMOUNT   (1 << 11) /* Automount/referral quasi-directory */
+ +#define S_NOSEC               (1 << 12) /* no suid or xattr security attributes */
   #ifdef CONFIG_FS_DAX
- -#define S_DAX         8192    /* Direct Access, avoiding the page cache */
+ +#define S_DAX         (1 << 13) /* Direct Access, avoiding the page cache */
   #else
- -#define S_DAX         0       /* Make all the DAX code disappear */
+ +#define S_DAX         0         /* Make all the DAX code disappear */
   #endif
- -#define S_ENCRYPTED   16384   /* Encrypted file (using fs/crypto/) */
- -#define S_CASEFOLD    32768   /* Casefolded file */
- -#define S_VERITY      65536   /* Verity file (using fs/verity/) */
+ +#define S_ENCRYPTED   (1 << 14) /* Encrypted file (using fs/crypto/) */
+ +#define S_CASEFOLD    (1 << 15) /* Casefolded file */
+ +#define S_VERITY      (1 << 16) /* Verity file (using fs/verity/) */
   
   /*
    * Note that nosuid etc flags are inode-specific: setting some file-system
@@@ -2132,6 -2157,10 +2132,10 @@@ static inline void kiocb_clone(struct k
    *
    * I_DONTCACHE                Evict inode as soon as it is not used anymore.
    *
+  * I_SYNC_QUEUED      Inode is queued in b_io or b_more_io writeback lists.
+  *                    Used to detect that mark_inode_dirty() should not move
+  *                    inode between dirty lists.
+  *
    * Q: What is the difference between I_WILL_FREE and I_FREEING?
    */
   #define I_DIRTY_SYNC          (1 << 0)
@@@ -2149,12 -2178,11 +2153,11 @@@
   #define I_DIO_WAKEUP          (1 << __I_DIO_WAKEUP)
   #define I_LINKABLE            (1 << 10)
   #define I_DIRTY_TIME          (1 << 11)
- #define __I_DIRTY_TIME_EXPIRED        12
- #define I_DIRTY_TIME_EXPIRED  (1 << __I_DIRTY_TIME_EXPIRED)
   #define I_WB_SWITCH           (1 << 13)
   #define I_OVL_INUSE           (1 << 14)
   #define I_CREATING            (1 << 15)
   #define I_DONTCACHE           (1 << 16)
+ #define I_SYNC_QUEUED         (1 << 17)
   
   #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
   #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@@ -2238,9 -2266,18 +2241,9 @@@ struct file_system_type 
   
   #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
   
- -#ifdef CONFIG_BLOCK
   extern struct dentry *mount_bdev(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data,
         int (*fill_super)(struct super_block *, void *, int));
- -#else
- -static inline struct dentry *mount_bdev(struct file_system_type *fs_type,
- -      int flags, const char *dev_name, void *data,
- -      int (*fill_super)(struct super_block *, void *, int))
- -{
- -      return ERR_PTR(-ENODEV);
- -}
- -#endif
   extern struct dentry *mount_single(struct file_system_type *fs_type,
         int flags, void *data,
         int (*fill_super)(struct super_block *, void *, int));
@@@ -2249,7 -2286,14 +2252,7 @@@ extern struct dentry *mount_nodev(struc
         int (*fill_super)(struct super_block *, void *, int));
   extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
   void generic_shutdown_super(struct super_block *sb);
- -#ifdef CONFIG_BLOCK
   void kill_block_super(struct super_block *sb);
- -#else
- -static inline void kill_block_super(struct super_block *sb)
- -{
- -      BUG();
- -}
- -#endif
   void kill_anon_super(struct super_block *sb);
   void kill_litter_super(struct super_block *sb);
   void deactivate_super(struct super_block *sb);
@@@ -2539,16 -2583,95 +2542,16 @@@ extern struct kmem_cache *names_cachep
   #define __getname()           kmem_cache_alloc(names_cachep, GFP_KERNEL)
   #define __putname(name)               kmem_cache_free(names_cachep, (void *)(name))
   
- -#ifdef CONFIG_BLOCK
- -extern int register_blkdev(unsigned int, const char *);
- -extern void unregister_blkdev(unsigned int, const char *);
- -extern struct block_device *bdget(dev_t);
- -extern struct block_device *bdgrab(struct block_device *bdev);
- -extern void bd_set_size(struct block_device *, loff_t size);
- -extern void bd_forget(struct inode *inode);
- -extern void bdput(struct block_device *);
- -extern void invalidate_bdev(struct block_device *);
- -extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
- -extern int sync_blockdev(struct block_device *bdev);
- -extern void kill_bdev(struct block_device *);
- -extern struct super_block *freeze_bdev(struct block_device *);
- -extern void emergency_thaw_all(void);
- -extern void emergency_thaw_bdev(struct super_block *sb);
- -extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
- -extern int fsync_bdev(struct block_device *);
- -
   extern struct super_block *blockdev_superblock;
- -
   static inline bool sb_is_blkdev_sb(struct super_block *sb)
   {
- -      return sb == blockdev_superblock;
- -}
- -#else
- -static inline void bd_forget(struct inode *inode) {}
- -static inline int sync_blockdev(struct block_device *bdev) { return 0; }
- -static inline void kill_bdev(struct block_device *bdev) {}
- -static inline void invalidate_bdev(struct block_device *bdev) {}
- -
- -static inline struct super_block *freeze_bdev(struct block_device *sb)
- -{
- -      return NULL;
+ +      return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
   }
   
- -static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
- -{
- -      return 0;
- -}
- -
- -static inline int emergency_thaw_bdev(struct super_block *sb)
- -{
- -      return 0;
- -}
- -
- -static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
- -{
- -}
- -
- -static inline bool sb_is_blkdev_sb(struct super_block *sb)
- -{
- -      return false;
- -}
- -#endif
+ +void emergency_thaw_all(void);
   extern int sync_filesystem(struct super_block *);
   extern const struct file_operations def_blk_fops;
   extern const struct file_operations def_chr_fops;
- -#ifdef CONFIG_BLOCK
- -extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
- -extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
- -extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
- -extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- -                                             void *holder);
- -extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
- -                                            void *holder);
- -extern struct block_device *bd_start_claiming(struct block_device *bdev,
- -                                            void *holder);
- -extern void bd_finish_claiming(struct block_device *bdev,
- -                             struct block_device *whole, void *holder);
- -extern void bd_abort_claiming(struct block_device *bdev,
- -                            struct block_device *whole, void *holder);
- -extern void blkdev_put(struct block_device *bdev, fmode_t mode);
- -
- -#ifdef CONFIG_SYSFS
- -extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
- -extern void bd_unlink_disk_holder(struct block_device *bdev,
- -                                struct gendisk *disk);
- -#else
- -static inline int bd_link_disk_holder(struct block_device *bdev,
- -                                    struct gendisk *disk)
- -{
- -      return 0;
- -}
- -static inline void bd_unlink_disk_holder(struct block_device *bdev,
- -                                       struct gendisk *disk)
- -{
- -}
- -#endif
- -#endif
   
   /* fs/char_dev.c */
   #define CHRDEV_MAJOR_MAX 512
@@@ -2579,12 -2702,31 +2582,12 @@@ static inline void unregister_chrdev(un
         __unregister_chrdev(major, 0, 256, name);
   }
   
- -/* fs/block_dev.c */
- -#define BDEVNAME_SIZE 32      /* Largest string for a blockdev identifier */
- -#define BDEVT_SIZE    10      /* Largest string for MAJ:MIN for blkdev */
- -
- -#ifdef CONFIG_BLOCK
- -#define BLKDEV_MAJOR_MAX      512
- -extern const char *bdevname(struct block_device *bdev, char *buffer);
- -extern struct block_device *lookup_bdev(const char *);
- -extern void blkdev_show(struct seq_file *,off_t);
- -
- -#else
- -#define BLKDEV_MAJOR_MAX      0
- -#endif
- -
   extern void init_special_inode(struct inode *, umode_t, dev_t);
   
   /* Invalid inode operations -- fs/bad_inode.c */
   extern void make_bad_inode(struct inode *);
   extern bool is_bad_inode(struct inode *);
   
- -#ifdef CONFIG_BLOCK
- -extern int revalidate_disk(struct gendisk *);
- -extern int check_disk_change(struct block_device *);
- -extern int __invalidate_device(struct block_device *, bool);
- -#endif
   unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                         pgoff_t start, pgoff_t end);
   
@@@ -2660,7 -2802,7 +2663,7 @@@ static inline void filemap_set_wb_err(s
   }
   
   /**
- - * filemap_check_wb_error - has an error occurred since the mark was sampled?
+ + * filemap_check_wb_err - has an error occurred since the mark was sampled?
    * @mapping: mapping to check for writeback errors
    * @since: previously-sampled errseq_t
    *
@@@ -2689,7 -2831,7 +2692,7 @@@ static inline errseq_t filemap_sample_w
   
   /**
    * file_sample_sb_err - sample the current errseq_t to test for later errors
- - * @mapping: mapping to be sampled
+ + * @file: file pointer to be sampled
    *
    * Grab the most current superblock-level errseq_t value for the given
    * struct file.
@@@ -2895,7 -3037,6 +2898,7 @@@ extern int kernel_read_file_from_path_i
   extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t,
                                     enum kernel_read_file_id);
   extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
+ +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
   extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
   extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
   extern struct file * open_exec(const char *);
@@@ -2960,21 -3101,6 +2963,21 @@@ extern void discard_new_inode(struct in
   extern unsigned int get_next_ino(void);
   extern void evict_inodes(struct super_block *sb);
   
+ +/*
+ + * Userspace may rely on the the inode number being non-zero. For example, glibc
+ + * simply ignores files with zero i_ino in unlink() and other places.
+ + *
+ + * As an additional complication, if userspace was compiled with
+ + * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
+ + * lower 32 bits, so we need to check that those aren't zero explicitly. With
+ + * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
+ + * better safe than sorry.
+ + */
+ +static inline bool is_zero_ino(ino_t ino)
+ +{
+ +      return (u32)ino == 0;
+ +}
+ +
   extern void __iget(struct inode * inode);
   extern void iget_failed(struct inode *);
   extern void clear_inode(struct inode *);
@@@ -3000,6 -3126,10 +3003,6 @@@ static inline void remove_inode_hash(st
   
   extern void inode_sb_list_add(struct inode *inode);
   
- -#ifdef CONFIG_BLOCK
- -extern int bdev_read_only(struct block_device *);
- -#endif
- -extern int set_blocksize(struct block_device *, int);
   extern int sb_set_blocksize(struct super_block *, int);
   extern int sb_min_blocksize(struct super_block *, int);
   
@@@ -3076,8 -3206,6 +3079,8 @@@ enum 
         DIO_SKIP_HOLES  = 0x02,
   };
   
+ +void dio_end_io(struct bio *bio);
+ +
   ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
                              struct block_device *bdev, struct iov_iter *iter,
                              get_block_t get_block,
@@@ -3312,28 -3440,22 +3315,28 @@@ static inline int iocb_flags(struct fil
   
   static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
   {
+ +      int kiocb_flags = 0;
+ +
+ +      if (!flags)
+ +              return 0;
         if (unlikely(flags & ~RWF_SUPPORTED))
                 return -EOPNOTSUPP;
   
         if (flags & RWF_NOWAIT) {
                 if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
                         return -EOPNOTSUPP;
- -              ki->ki_flags |= IOCB_NOWAIT;
+ +              kiocb_flags |= IOCB_NOWAIT | IOCB_NOIO;
         }
         if (flags & RWF_HIPRI)
- -              ki->ki_flags |= IOCB_HIPRI;
+ +              kiocb_flags |= IOCB_HIPRI;
         if (flags & RWF_DSYNC)
- -              ki->ki_flags |= IOCB_DSYNC;
+ +              kiocb_flags |= IOCB_DSYNC;
         if (flags & RWF_SYNC)
- -              ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ +              kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC);
         if (flags & RWF_APPEND)
- -              ki->ki_flags |= IOCB_APPEND;
+ +              kiocb_flags |= IOCB_APPEND;
+ +
+ +      ki->ki_flags |= kiocb_flags;
         return 0;
   }
author	Linus Torvalds <[email protected]>
	Fri, 28 Aug 2020 17:57:14 +0000 (10:57 -0700)
committer	Linus Torvalds <[email protected]>
	Fri, 28 Aug 2020 17:57:14 +0000 (10:57 -0700)
		1	2
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/libxfs/xfs_trans_inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history