Merge tag 'xfs-5.15-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

author Linus Torvalds <[email protected]>

Thu, 2 Sep 2021 15:26:03 +0000 (08:26 -0700)

committer Linus Torvalds <[email protected]>

Thu, 2 Sep 2021 15:26:03 +0000 (08:26 -0700)
author Linus Torvalds <[email protected]>
Thu, 2 Sep 2021 15:26:03 +0000 (08:26 -0700)
committer Linus Torvalds <[email protected]>
Thu, 2 Sep 2021 15:26:03 +0000 (08:26 -0700)
diff --combined fs/xfs/xfs_bmap_util.c

index 1cd3f940fa6aedce0f2912b42fb7b5cc5914b082,674c078c6e9e71a039be7e2c5d3e807a35190108..73a36b7be3bd109894e410be719c4fc774c17de7
--- 1/fs/xfs/xfs_bmap_util.c
--- 2/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@@ -731,7 -731,7 +731,7 @@@ xfs_free_eofblocks
   
         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
         if (error) {
-               ASSERT(XFS_FORCED_SHUTDOWN(mp));
+               ASSERT(xfs_is_shutdown(mp));
                 return error;
         }
   
@@@ -789,7 -789,7 +789,7 @@@ xfs_alloc_file_space
   
         trace_xfs_alloc_file_space(ip);
   
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (xfs_is_shutdown(mp))
                 return -EIO;
   
         error = xfs_qm_dqattach(ip);
@@@ -1282,7 -1282,7 +1282,7 @@@ xfs_swap_extents_check_format
          * If we have to use the (expensive) rmap swap method, we can
          * handle any number of extents and any format.
          */
-       if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb))
+       if (xfs_has_rmapbt(ip->i_mount))
                 return 0;
   
         /*
@@@ -1516,7 -1516,7 +1516,7 @@@ xfs_swap_extent_forks
          * event of a crash. Set the owner change log flags now and leave the
          * bmbt scan as the last step.
          */
-       if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
+       if (xfs_has_v3inodes(ip->i_mount)) {
                 if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE)
                         (*target_log_flags) |= XFS_ILOG_DOWNER;
                 if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE)
@@@ -1553,7 -1553,7 +1553,7 @@@
                 (*src_log_flags) |= XFS_ILOG_DEXT;
                 break;
         case XFS_DINODE_FMT_BTREE:
-               ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) ||
+               ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
                        (*src_log_flags & XFS_ILOG_DOWNER));
                 (*src_log_flags) |= XFS_ILOG_DBROOT;
                 break;
@@@ -1565,7 -1565,7 +1565,7 @@@
                 break;
         case XFS_DINODE_FMT_BTREE:
                 (*target_log_flags) |= XFS_ILOG_DBROOT;
-               ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) ||
+               ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
                        (*target_log_flags & XFS_ILOG_DOWNER));
                 break;
         }
@@@ -1626,6 -1626,7 +1626,6 @@@ xfs_swap_extents
         struct xfs_bstat        *sbp = &sxp->sx_stat;
         int                     src_log_flags, target_log_flags;
         int                     error = 0;
- -      int                     lock_flags;
         uint64_t                f;
         int                     resblks = 0;
         unsigned int            flags = 0;
@@@ -1637,8 -1638,8 +1637,8 @@@
          * do the rest of the checks.
          */
         lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
- -      lock_flags = XFS_MMAPLOCK_EXCL;
- -      xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL);
+ +      filemap_invalidate_lock_two(VFS_I(ip)->i_mapping,
+ +                                  VFS_I(tip)->i_mapping);
   
         /* Verify that both files have the same format */
         if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
@@@ -1678,7 -1679,7 +1678,7 @@@
          * a block reservation because it's really just a remap operation
          * performed with log redo items!
          */
-       if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+       if (xfs_has_rmapbt(mp)) {
                 int             w = XFS_DATA_FORK;
                 uint32_t        ipnext = ip->i_df.if_nextents;
                 uint32_t        tipnext = tip->i_df.if_nextents;
@@@ -1710,6 -1711,7 +1710,6 @@@
          * or cancel will unlock the inodes from this point onwards.
          */
         xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
- -      lock_flags |= XFS_ILOCK_EXCL;
         xfs_trans_ijoin(tp, ip, 0);
         xfs_trans_ijoin(tp, tip, 0);
   
@@@ -1759,7 -1761,7 +1759,7 @@@
         src_log_flags = XFS_ILOG_CORE;
         target_log_flags = XFS_ILOG_CORE;
   
-       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+       if (xfs_has_rmapbt(mp))
                 error = xfs_swap_extent_rmap(&tp, ip, tip);
         else
                 error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
@@@ -1778,7 -1780,7 +1778,7 @@@
         }
   
         /* Swap the cow forks. */
-       if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+       if (xfs_has_reflink(mp)) {
                 ASSERT(!ip->i_cowfp ||
                        ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
                 ASSERT(!tip->i_cowfp ||
@@@ -1820,7 -1822,7 +1820,7 @@@
          * If this is a synchronous mount, make sure that the
          * transaction goes to disk before returning to the user.
          */
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
+       if (xfs_has_wsync(mp))
                 xfs_trans_set_sync(tp);
   
         error = xfs_trans_commit(tp);
@@@ -1828,16 -1830,13 +1828,16 @@@
         trace_xfs_swap_extent_after(ip, 0);
         trace_xfs_swap_extent_after(tip, 1);
   
+ +out_unlock_ilock:
+ +      xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ +      xfs_iunlock(tip, XFS_ILOCK_EXCL);
   out_unlock:
- -      xfs_iunlock(ip, lock_flags);
- -      xfs_iunlock(tip, lock_flags);
+ +      filemap_invalidate_unlock_two(VFS_I(ip)->i_mapping,
+ +                                    VFS_I(tip)->i_mapping);
         unlock_two_nondirectories(VFS_I(ip), VFS_I(tip));
         return error;
   
   out_trans_cancel:
         xfs_trans_cancel(tp);
- -      goto out_unlock;
+ +      goto out_unlock_ilock;
   }
diff --combined fs/xfs/xfs_buf.c

index 3ab73567a0f5b2921e6e4971688ce743bf80e9d6,047bd6e3f389fb68dc25104c0cd601703cb90809..5fa6cd947dd41adda5961f904b044a952812c7f0
--- 1/fs/xfs/xfs_buf.c
--- 2/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@@ -251,7 -251,7 +251,7 @@@ _xfs_buf_alloc
                 return error;
         }
   
-       bp->b_bn = map[0].bm_bn;
+       bp->b_rhash_key = map[0].bm_bn;
         bp->b_length = 0;
         for (i = 0; i < nmaps; i++) {
                 bp->b_maps[i].bm_bn = map[i].bm_bn;
@@@ -315,7 -315,6 +315,6 @@@ xfs_buf_alloc_kmem
         struct xfs_buf  *bp,
         xfs_buf_flags_t flags)
   {
-       int             align_mask = xfs_buftarg_dma_alignment(bp->b_target);
         xfs_km_flags_t  kmflag_mask = KM_NOFS;
         size_t          size = BBTOB(bp->b_length);
   
@@@ -323,7 -322,7 +322,7 @@@
         if (!(flags & XBF_READ))
                 kmflag_mask |= KM_ZERO;
   
-       bp->b_addr = kmem_alloc_io(size, align_mask, kmflag_mask);
+       bp->b_addr = kmem_alloc(size, kmflag_mask);
         if (!bp->b_addr)
                 return -ENOMEM;
   
@@@ -460,7 -459,7 +459,7 @@@ _xfs_buf_obj_cmp
          */
         BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
   
-       if (bp->b_bn != map->bm_bn)
+       if (bp->b_rhash_key != map->bm_bn)
                 return 1;
   
         if (unlikely(bp->b_length != map->bm_len)) {
@@@ -482,7 -481,7 +481,7 @@@ static const struct rhashtable_params x
         .min_size               = 32,   /* empty AGs have minimal footprint */
         .nelem_hint             = 16,
         .key_len                = sizeof(xfs_daddr_t),
-       .key_offset             = offsetof(struct xfs_buf, b_bn),
+       .key_offset             = offsetof(struct xfs_buf, b_rhash_key),
         .head_offset            = offsetof(struct xfs_buf, b_rhash_head),
         .automatic_shrinking    = true,
         .obj_cmpfn              = _xfs_buf_obj_cmp,
@@@ -814,7 -813,7 +813,7 @@@ xfs_buf_read_map
          * buffer.
          */
         if (error) {
-               if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
+               if (!xfs_is_shutdown(target->bt_mount))
                         xfs_buf_ioerror_alert(bp, fa);
   
                 bp->b_flags &= ~XBF_DONE;
@@@ -844,7 -843,7 +843,7 @@@ xfs_buf_readahead_map
   {
         struct xfs_buf          *bp;
   
- -      if (bdi_read_congested(target->bt_bdev->bd_bdi))
+ +      if (bdi_read_congested(target->bt_bdev->bd_disk->bdi))
                 return;
   
         xfs_buf_read_map(target, map, nmaps,
@@@ -854,7 -853,9 +853,9 @@@
   
   /*
    * Read an uncached buffer from disk. Allocates and returns a locked
-  * buffer containing the disk contents or nothing.
+  * buffer containing the disk contents or nothing. Uncached buffers always have
+  * a cache index of XFS_BUF_DADDR_NULL so we can easily determine if the buffer
+  * is cached or uncached during fault diagnosis.
    */
   int
   xfs_buf_read_uncached(
@@@ -876,7 -877,7 +877,7 @@@
   
         /* set up the buffer for a read IO */
         ASSERT(bp->b_map_count == 1);
-       bp->b_bn = XFS_BUF_DADDR_NULL;  /* always null for uncached buffers */
+       bp->b_rhash_key = XFS_BUF_DADDR_NULL;
         bp->b_maps[0].bm_bn = daddr;
         bp->b_flags |= XBF_READ;
         bp->b_ops = ops;
@@@ -1145,7 -1146,7 +1146,7 @@@ xfs_buf_ioerror_permanent
                 return true;
   
         /* At unmount we may treat errors differently */
-       if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
+       if (xfs_is_unmounting(mp) && mp->m_fail_unmount)
                 return true;
   
         return false;
@@@ -1179,7 -1180,7 +1180,7 @@@ xfs_buf_ioend_handle_error
          * If we've already decided to shutdown the filesystem because of I/O
          * errors, there's no point in giving this a retry.
          */
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (xfs_is_shutdown(mp))
                 goto out_stale;
   
         xfs_buf_ioerror_alert_ratelimited(bp);
@@@ -1336,7 -1337,7 +1337,7 @@@ xfs_buf_ioerror_alert
   {
         xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
                 "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
-                                 func, (uint64_t)XFS_BUF_ADDR(bp),
+                                 func, (uint64_t)xfs_buf_daddr(bp),
                                   bp->b_length, -bp->b_error);
   }
   
@@@ -1514,17 -1515,18 +1515,18 @@@ _xfs_buf_ioapply
                                                    SHUTDOWN_CORRUPT_INCORE);
                                 return;
                         }
-               } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
+               } else if (bp->b_rhash_key != XFS_BUF_DADDR_NULL) {
                         struct xfs_mount *mp = bp->b_mount;
   
                         /*
                          * non-crc filesystems don't attach verifiers during
                          * log recovery, so don't warn for such filesystems.
                          */
-                       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                       if (xfs_has_crc(mp)) {
                                 xfs_warn(mp,
                                         "%s: no buf ops on daddr 0x%llx len %d",
-                                       __func__, bp->b_bn, bp->b_length);
+                                       __func__, xfs_buf_daddr(bp),
+                                       bp->b_length);
                                 xfs_hex_dump(bp->b_addr,
                                                 XFS_CORRUPTION_DUMP_LEN);
                                 dump_stack();
@@@ -1592,7 -1594,7 +1594,7 @@@ __xfs_buf_submit
         ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
   
         /* on shutdown we stale and complete the buffer immediately */
-       if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+       if (xfs_is_shutdown(bp->b_mount)) {
                 xfs_buf_ioend_fail(bp);
                 return -EIO;
         }
@@@ -1794,7 -1796,7 +1796,7 @@@ xfs_buftarg_drain
                                 xfs_buf_alert_ratelimited(bp,
                                         "XFS: Corruption Alert",
   "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
-                                       (long long)bp->b_bn);
+                                       (long long)xfs_buf_daddr(bp));
                         }
                         xfs_buf_rele(bp);
                 }
@@@ -1809,7 -1811,7 +1811,7 @@@
          * down the fs.
          */
         if (write_fail) {
-               ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount));
+               ASSERT(xfs_is_shutdown(btp->bt_mount));
                 xfs_alert(btp->bt_mount,
               "Please run xfs_repair to determine the extent of the problem.");
         }
@@@ -2302,7 -2304,7 +2304,7 @@@ xfs_verify_magic
         struct xfs_mount        *mp = bp->b_mount;
         int                     idx;
   
-       idx = xfs_sb_version_hascrc(&mp->m_sb);
+       idx = xfs_has_crc(mp);
         if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
                 return false;
         return dmagic == bp->b_ops->magic[idx];
@@@ -2320,7 -2322,7 +2322,7 @@@ xfs_verify_magic16
         struct xfs_mount        *mp = bp->b_mount;
         int                     idx;
   
-       idx = xfs_sb_version_hascrc(&mp->m_sb);
+       idx = xfs_has_crc(mp);
         if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
                 return false;
         return dmagic == bp->b_ops->magic16[idx];
diff --combined fs/xfs/xfs_file.c

index 3dfbdcdb0d1ce5c3ae036cbb8b041f24588bbecf,f9a88cc33c7d71ba30d0cf52126cb889e6cdacff..7aa943edfc02fd515173988047c6536efbe19952
--- 1/fs/xfs/xfs_file.c
--- 2/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@@ -185,7 -185,7 +185,7 @@@ xfs_file_fsync
         if (error)
                 return error;
   
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (xfs_is_shutdown(mp))
                 return -EIO;
   
         xfs_iflags_clear(ip, XFS_ITRUNCATED);
@@@ -318,7 -318,7 +318,7 @@@ xfs_file_read_iter
   
         XFS_STATS_INC(mp, xs_read_calls);
   
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (xfs_is_shutdown(mp))
                 return -EIO;
   
         if (IS_DAX(inode))
@@@ -462,7 -462,7 +462,7 @@@ xfs_dio_write_end_io
   
         trace_xfs_end_io_direct_write(ip, offset, size);
   
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+       if (xfs_is_shutdown(ip->i_mount))
                 return -EIO;
   
         if (error)
@@@ -814,7 -814,7 +814,7 @@@ xfs_file_write_iter
         if (ocount == 0)
                 return 0;
   
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+       if (xfs_is_shutdown(ip->i_mount))
                 return -EIO;
   
         if (IS_DAX(inode))
@@@ -1122,7 -1122,7 +1122,7 @@@ static inline bool xfs_file_sync_writes
   {
         struct xfs_inode        *ip = XFS_I(file_inode(filp));
   
-       if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC)
+       if (xfs_has_wsync(ip->i_mount))
                 return true;
         if (filp->f_flags & (__O_SYNC | O_DSYNC))
                 return true;
@@@ -1153,10 -1153,10 +1153,10 @@@ xfs_file_remap_range
         if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
                 return -EINVAL;
   
-       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+       if (!xfs_has_reflink(mp))
                 return -EOPNOTSUPP;
   
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (xfs_is_shutdown(mp))
                 return -EIO;
   
         /* Prepare and then clone file data. */
@@@ -1205,7 -1205,7 +1205,7 @@@ xfs_file_open
   {
         if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
                 return -EFBIG;
-       if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
+       if (xfs_is_shutdown(XFS_M(inode->i_sb)))
                 return -EIO;
         file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
         return 0;
@@@ -1277,7 -1277,7 +1277,7 @@@ xfs_file_llseek
   {
         struct inode            *inode = file->f_mapping->host;
   
-       if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount))
+       if (xfs_is_shutdown(XFS_I(inode)->i_mount))
                 return -EIO;
   
         switch (whence) {
@@@ -1302,7 -1302,7 +1302,7 @@@
    *
    * mmap_lock (MM)
    *   sb_start_pagefault(vfs, freeze)
- - *     i_mmaplock (XFS - truncate serialisation)
+ + *     invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
    *       page_lock (MM)
    *         i_lock (XFS - extent map serialisation)
    */
@@@ -1323,27 -1323,24 +1323,27 @@@ __xfs_filemap_fault
                 file_update_time(vmf->vma->vm_file);
         }
   
- -      xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
         if (IS_DAX(inode)) {
                 pfn_t pfn;
   
+ +              xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
                 ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL,
                                 (write_fault && !vmf->cow_page) ?
                                  &xfs_direct_write_iomap_ops :
                                  &xfs_read_iomap_ops);
                 if (ret & VM_FAULT_NEEDDSYNC)
                         ret = dax_finish_sync_fault(vmf, pe_size, pfn);
+ +              xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
         } else {
- -              if (write_fault)
+ +              if (write_fault) {
+ +                      xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
                         ret = iomap_page_mkwrite(vmf,
                                         &xfs_buffered_write_iomap_ops);
- -              else
+ +                      xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ +              } else {
                         ret = filemap_fault(vmf);
+ +              }
         }
- -      xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
   
         if (write_fault)
                 sb_end_pagefault(inode->i_sb);
diff --combined fs/xfs/xfs_inode.c

index f00145e1a976ef60ccdbd6249351d8f65ff6c908,719694fa53af6bbbb4872ca31ef6383fc5b118ed..a4f6f034fb8131e865c01c54a14d8e2dc7ff0d4b
--- 1/fs/xfs/xfs_inode.c
--- 2/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@@ -132,7 -132,7 +132,7 @@@ xfs_ilock_attr_map_shared
   
   /*
    * In addition to i_rwsem in the VFS inode, the xfs inode contains 2
- - * multi-reader locks: i_mmap_lock and the i_lock.  This routine allows
+ + * multi-reader locks: invalidate_lock and the i_lock.  This routine allows
    * various combinations of the locks to be obtained.
    *
    * The 3 locks should always be ordered so that the IO lock is obtained first,
@@@ -140,23 -140,23 +140,23 @@@
    *
    * Basic locking order:
    *
- - * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
+ + * i_rwsem -> invalidate_lock -> page_lock -> i_ilock
    *
    * mmap_lock locking order:
    *
    * i_rwsem -> page lock -> mmap_lock
- - * mmap_lock -> i_mmap_lock -> page_lock
+ + * mmap_lock -> invalidate_lock -> page_lock
    *
    * The difference in mmap_lock locking order mean that we cannot hold the
- - * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
- - * fault in pages during copy in/out (for buffered IO) or require the mmap_lock
- - * in get_user_pages() to map the user pages into the kernel address space for
- - * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
- - * page faults already hold the mmap_lock.
+ + * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths
+ + * can fault in pages during copy in/out (for buffered IO) or require the
+ + * mmap_lock in get_user_pages() to map the user pages into the kernel address
+ + * space for direct IO. Similarly the i_rwsem cannot be taken inside a page
+ + * fault because page faults already hold the mmap_lock.
    *
    * Hence to serialise fully against both syscall and mmap based IO, we need to
- - * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
- - * taken in places where we need to invalidate the page cache in a race
+ + * take both the i_rwsem and the invalidate_lock. These locks should *only* be
+ + * both taken in places where we need to invalidate the page cache in a race
    * free manner (e.g. truncate, hole punch and other extent manipulation
    * functions).
    */
@@@ -188,13 -188,10 +188,13 @@@ xfs_ilock
                                  XFS_IOLOCK_DEP(lock_flags));
         }
   
- -      if (lock_flags & XFS_MMAPLOCK_EXCL)
- -              mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
- -      else if (lock_flags & XFS_MMAPLOCK_SHARED)
- -              mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+ +      if (lock_flags & XFS_MMAPLOCK_EXCL) {
+ +              down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ +                                XFS_MMAPLOCK_DEP(lock_flags));
+ +      } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+ +              down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ +                               XFS_MMAPLOCK_DEP(lock_flags));
+ +      }
   
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
@@@ -243,10 -240,10 +243,10 @@@ xfs_ilock_nowait
         }
   
         if (lock_flags & XFS_MMAPLOCK_EXCL) {
- -              if (!mrtryupdate(&ip->i_mmaplock))
+ +              if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
                         goto out_undo_iolock;
         } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
- -              if (!mrtryaccess(&ip->i_mmaplock))
+ +              if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
                         goto out_undo_iolock;
         }
   
@@@ -261,9 -258,9 +261,9 @@@
   
   out_undo_mmaplock:
         if (lock_flags & XFS_MMAPLOCK_EXCL)
- -              mrunlock_excl(&ip->i_mmaplock);
+ +              up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
         else if (lock_flags & XFS_MMAPLOCK_SHARED)
- -              mrunlock_shared(&ip->i_mmaplock);
+ +              up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
   out_undo_iolock:
         if (lock_flags & XFS_IOLOCK_EXCL)
                 up_write(&VFS_I(ip)->i_rwsem);
@@@ -310,9 -307,9 +310,9 @@@ xfs_iunlock
                 up_read(&VFS_I(ip)->i_rwsem);
   
         if (lock_flags & XFS_MMAPLOCK_EXCL)
- -              mrunlock_excl(&ip->i_mmaplock);
+ +              up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
         else if (lock_flags & XFS_MMAPLOCK_SHARED)
- -              mrunlock_shared(&ip->i_mmaplock);
+ +              up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
   
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrunlock_excl(&ip->i_lock);
@@@ -338,7 -335,7 +338,7 @@@ xfs_ilock_demote
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrdemote(&ip->i_lock);
         if (lock_flags & XFS_MMAPLOCK_EXCL)
- -              mrdemote(&ip->i_mmaplock);
+ +              downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock);
         if (lock_flags & XFS_IOLOCK_EXCL)
                 downgrade_write(&VFS_I(ip)->i_rwsem);
   
@@@ -346,29 -343,9 +346,29 @@@
   }
   
   #if defined(DEBUG) || defined(XFS_WARN)
- -int
+ +static inline bool
+ +__xfs_rwsem_islocked(
+ +      struct rw_semaphore     *rwsem,
+ +      bool                    shared)
+ +{
+ +      if (!debug_locks)
+ +              return rwsem_is_locked(rwsem);
+ +
+ +      if (!shared)
+ +              return lockdep_is_held_type(rwsem, 0);
+ +
+ +      /*
+ +       * We are checking that the lock is held at least in shared
+ +       * mode but don't care that it might be held exclusively
+ +       * (i.e. shared | excl). Hence we check if the lock is held
+ +       * in any mode rather than an explicit shared mode.
+ +       */
+ +      return lockdep_is_held_type(rwsem, -1);
+ +}
+ +
+ +bool
   xfs_isilocked(
- -      xfs_inode_t             *ip,
+ +      struct xfs_inode        *ip,
         uint                    lock_flags)
   {
         if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
@@@ -378,17 -355,20 +378,17 @@@
         }
   
         if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
- -              if (!(lock_flags & XFS_MMAPLOCK_SHARED))
- -                      return !!ip->i_mmaplock.mr_writer;
- -              return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+ +              return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ +                              (lock_flags & XFS_IOLOCK_SHARED));
         }
   
- -      if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
- -              if (!(lock_flags & XFS_IOLOCK_SHARED))
- -                      return !debug_locks ||
- -                              lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
- -              return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
+ +      if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
+ +              return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ +                              (lock_flags & XFS_IOLOCK_SHARED));
         }
   
         ASSERT(0);
- -      return 0;
+ +      return false;
   }
   #endif
   
@@@ -552,10 -532,12 +552,10 @@@ again
   }
   
   /*
- - * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
- - * the mmaplock or the ilock, but not more than one type at a time. If we lock
- - * more than one at a time, lockdep will report false positives saying we have
- - * violated locking orders.  The iolock must be double-locked separately since
- - * we use i_rwsem for that.  We now support taking one lock EXCL and the other
- - * SHARED.
+ + * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and
+ + * mmaplock must be double-locked separately since we use i_rwsem and
+ + * invalidate_lock for that. We now support taking one lock EXCL and the
+ + * other SHARED.
    */
   void
   xfs_lock_two_inodes(
@@@ -573,8 -555,15 +573,8 @@@
         ASSERT(hweight32(ip1_mode) == 1);
         ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
         ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
- -      ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- -             !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- -      ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- -             !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- -      ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- -             !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- -      ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- -             !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- -
+ +      ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+ +      ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
         ASSERT(ip0->i_ino != ip1->i_ino);
   
         if (ip0->i_ino > ip1->i_ino) {
@@@ -674,7 -663,7 +674,7 @@@ xfs_lookup
   
         trace_xfs_lookup(dp, name);
   
-       if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+       if (xfs_is_shutdown(dp->i_mount))
                 return -EIO;
   
         error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
@@@ -716,7 -705,7 +716,7 @@@ xfs_inode_inherit_flags
                         di_flags |= XFS_DIFLAG_PROJINHERIT;
         } else if (S_ISREG(mode)) {
                 if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
-                   xfs_sb_version_hasrealtime(&ip->i_mount->m_sb))
+                   xfs_has_realtime(ip->i_mount))
                         di_flags |= XFS_DIFLAG_REALTIME;
                 if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
                         di_flags |= XFS_DIFLAG_EXTSIZE;
@@@ -837,8 -826,7 +837,7 @@@ xfs_init_new_inode
         inode->i_rdev = rdev;
         ip->i_projid = prid;
   
-       if (dir && !(dir->i_mode & S_ISGID) &&
-           (mp->m_flags & XFS_MOUNT_GRPID)) {
+       if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
                 inode_fsuid_set(inode, mnt_userns);
                 inode->i_gid = dir->i_gid;
                 inode->i_mode = mode;
@@@ -868,7 -856,7 +867,7 @@@
         ip->i_extsize = 0;
         ip->i_diflags = 0;
   
-       if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+       if (xfs_has_v3inodes(mp)) {
                 inode_set_iversion(inode, 1);
                 ip->i_cowextsize = 0;
                 ip->i_crtime = tv;
@@@ -908,7 -896,7 +907,7 @@@
          * this saves us from needing to run a separate transaction to set the
          * fork offset in the immediate future.
          */
-       if (init_xattrs && xfs_sb_version_hasattr(&mp->m_sb)) {
+       if (init_xattrs && xfs_has_attr(mp)) {
                 ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
                 ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
         }
@@@ -987,7 -975,7 +986,7 @@@ xfs_create
   
         trace_xfs_create(dp, name);
   
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (xfs_is_shutdown(mp))
                 return -EIO;
   
         prid = xfs_get_initial_prid(dp);
@@@ -1079,7 -1067,7 +1078,7 @@@
          * create transaction goes to disk before returning to
          * the user.
          */
-       if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+       if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
                 xfs_trans_set_sync(tp);
   
         /*
@@@ -1141,7 -1129,7 +1140,7 @@@ xfs_create_tmpfile
         uint                    resblks;
         xfs_ino_t               ino;
   
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (xfs_is_shutdown(mp))
                 return -EIO;
   
         prid = xfs_get_initial_prid(dp);
@@@ -1171,7 -1159,7 +1170,7 @@@
         if (error)
                 goto out_trans_cancel;
   
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
+       if (xfs_has_wsync(mp))
                 xfs_trans_set_sync(tp);
   
         /*
@@@ -1231,7 -1219,7 +1230,7 @@@ xfs_link
   
         ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
   
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (xfs_is_shutdown(mp))
                 return -EIO;
   
         error = xfs_qm_dqattach(sip);
@@@ -1305,7 -1293,7 +1304,7 @@@
          * link transaction goes to disk before returning to
          * the user.
          */
-       if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+       if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
                 xfs_trans_set_sync(tp);
   
         return xfs_trans_commit(tp);
@@@ -1446,10 -1434,10 +1445,10 @@@ xfs_release
                 return 0;
   
         /* If this is a read-only mount, don't do this (would generate I/O) */
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
+       if (xfs_is_readonly(mp))
                 return 0;
   
-       if (!XFS_FORCED_SHUTDOWN(mp)) {
+       if (!xfs_is_shutdown(mp)) {
                 int truncated;
   
                 /*
@@@ -1532,7 -1520,7 +1531,7 @@@ xfs_inactive_truncate
   
         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
         if (error) {
-               ASSERT(XFS_FORCED_SHUTDOWN(mp));
+               ASSERT(xfs_is_shutdown(mp));
                 return error;
         }
         xfs_ilock(ip, XFS_ILOCK_EXCL);
@@@ -1603,7 -1591,7 +1602,7 @@@ xfs_inactive_ifree
                         "Failed to remove inode(s) from unlinked list. "
                         "Please free space, unmount and run xfs_repair.");
                 } else {
-                       ASSERT(XFS_FORCED_SHUTDOWN(mp));
+                       ASSERT(xfs_is_shutdown(mp));
                 }
                 return error;
         }
@@@ -1639,7 -1627,7 +1638,7 @@@
                  * might do that, we need to make sure.  Otherwise the
                  * inode might be lost for a long time or forever.
                  */
-               if (!XFS_FORCED_SHUTDOWN(mp)) {
+               if (!xfs_is_shutdown(mp)) {
                         xfs_notice(mp, "%s: xfs_ifree returned error %d",
                                 __func__, error);
                         xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
@@@ -1665,6 -1653,59 +1664,59 @@@
         return 0;
   }
   
+ /*
+  * Returns true if we need to update the on-disk metadata before we can free
+  * the memory used by this inode.  Updates include freeing post-eof
+  * preallocations; freeing COW staging extents; and marking the inode free in
+  * the inobt if it is on the unlinked list.
+  */
+ bool
+ xfs_inode_needs_inactive(
+       struct xfs_inode        *ip)
+ {
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ 
+       /*
+        * If the inode is already free, then there can be nothing
+        * to clean up here.
+        */
+       if (VFS_I(ip)->i_mode == 0)
+               return false;
+ 
+       /* If this is a read-only mount, don't do this (would generate I/O) */
+       if (xfs_is_readonly(mp))
+               return false;
+ 
+       /* If the log isn't running, push inodes straight to reclaim. */
+       if (xfs_is_shutdown(mp) || xfs_has_norecovery(mp))
+               return false;
+ 
+       /* Metadata inodes require explicit resource cleanup. */
+       if (xfs_is_metadata_inode(ip))
+               return false;
+ 
+       /* Want to clean out the cow blocks if there are any. */
+       if (cow_ifp && cow_ifp->if_bytes > 0)
+               return true;
+ 
+       /* Unlinked files must be freed. */
+       if (VFS_I(ip)->i_nlink == 0)
+               return true;
+ 
+       /*
+        * This file isn't being freed, so check if there are post-eof blocks
+        * to free.  @force is true because we are evicting an inode from the
+        * cache.  Post-eof blocks must be freed, lest we end up with broken
+        * free space accounting.
+        *
+        * Note: don't bother with iolock here since lockdep complains about
+        * acquiring it in reclaim context. We have the only reference to the
+        * inode at this point anyways.
+        */
+       return xfs_can_free_eofblocks(ip, true);
+ }
+ 
   /*
    * xfs_inactive
    *
@@@ -1694,7 -1735,7 +1746,7 @@@ xfs_inactive
         ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
   
         /* If this is a read-only mount, don't do this (would generate I/O) */
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
+       if (xfs_is_readonly(mp))
                 goto out;
   
         /* Metadata inodes require explicit resource cleanup. */
@@@ -1969,7 -2010,7 +2021,7 @@@ xfs_iunlink_destroy
         rhashtable_free_and_destroy(&pag->pagi_unlinked_hash,
                         xfs_iunlink_free_item, &freed_anything);
   
-       ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount));
+       ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount));
   }
   
   /*
@@@ -2714,7 -2755,7 +2766,7 @@@ xfs_remove
   
         trace_xfs_remove(dp, name);
   
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (xfs_is_shutdown(mp))
                 return -EIO;
   
         error = xfs_qm_dqattach(dp);
@@@ -2813,7 -2854,7 +2865,7 @@@
          * remove transaction goes to disk before returning to
          * the user.
          */
-       if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+       if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
                 xfs_trans_set_sync(tp);
   
         error = xfs_trans_commit(tp);
@@@ -2890,7 -2931,7 +2942,7 @@@ xfs_finish_rename
          * If this is a synchronous mount, make sure that the rename transaction
          * goes to disk before returning to the user.
          */
-       if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+       if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
                 xfs_trans_set_sync(tp);
   
         return xfs_trans_commit(tp);
@@@ -3473,7 -3514,7 +3525,7 @@@ xfs_iflush
          * happen but we need to still do it to ensure backwards compatibility
          * with old kernels that predate logging all inode changes.
          */
-       if (!xfs_sb_version_has_v3inode(&mp->m_sb))
+       if (!xfs_has_v3inodes(mp))
                 ip->i_flushiter++;
   
         /*
@@@ -3495,7 -3536,7 +3547,7 @@@
         xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
   
         /* Wrap, we never let the log put out DI_MAX_FLUSH */
-       if (!xfs_sb_version_has_v3inode(&mp->m_sb)) {
+       if (!xfs_has_v3inodes(mp)) {
                 if (ip->i_flushiter == DI_MAX_FLUSH)
                         ip->i_flushiter = 0;
         }
@@@ -3614,7 -3655,7 +3666,7 @@@ xfs_iflush_cluster
                  * AIL, leaving a dirty/unpinned inode attached to the buffer
                  * that otherwise looks like it should be flushed.
                  */
-               if (XFS_FORCED_SHUTDOWN(mp)) {
+               if (xfs_is_shutdown(mp)) {
                         xfs_iunpin_wait(ip);
                         xfs_iflush_abort(ip);
                         xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@@ -3752,8 -3793,11 +3804,8 @@@ xfs_ilock2_io_mmap
         ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
         if (ret)
                 return ret;
- -      if (ip1 == ip2)
- -              xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
- -      else
- -              xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL,
- -                                  ip2, XFS_MMAPLOCK_EXCL);
+ +      filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping,
+ +                                  VFS_I(ip2)->i_mapping);
         return 0;
   }
   
@@@ -3763,9 -3807,12 +3815,9 @@@ xfs_iunlock2_io_mmap
         struct xfs_inode        *ip1,
         struct xfs_inode        *ip2)
   {
- -      bool                    same_inode = (ip1 == ip2);
- -
- -      xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
- -      if (!same_inode)
- -              xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
+ +      filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping,
+ +                                    VFS_I(ip2)->i_mapping);
         inode_unlock(VFS_I(ip2));
- -      if (!same_inode)
+ +      if (ip1 != ip2)
                 inode_unlock(VFS_I(ip1));
   }
diff --combined fs/xfs/xfs_inode.h

index e0ae905554e25519dad1be21556ef6537d469c13,768f0d743158fdeb0fd53ef0bea163ca50d0ef25..b21b177832d16277df83f98c78124a34889adfed
--- 1/fs/xfs/xfs_inode.h
--- 2/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@@ -40,7 -40,9 +40,8 @@@ typedef struct xfs_inode 
         /* Transaction and locking information. */
         struct xfs_inode_log_item *i_itemp;     /* logging information */
         mrlock_t                i_lock;         /* inode lock */
- -      mrlock_t                i_mmaplock;     /* inode mmap IO lock */
         atomic_t                i_pincount;     /* inode pin count */
+       struct llist_node       i_gclist;       /* deferred inactivation list */
   
         /*
          * Bitsets of inode metadata that have been checked and/or are sick.
@@@ -239,6 -241,7 +240,7 @@@ static inline bool xfs_inode_has_bigtim
   #define __XFS_IPINNED_BIT     8        /* wakeup key for zero pin count */
   #define XFS_IPINNED           (1 << __XFS_IPINNED_BIT)
   #define XFS_IEOFBLOCKS                (1 << 9) /* has the preallocblocks tag set */
+ #define XFS_NEED_INACTIVE     (1 << 10) /* see XFS_INACTIVATING below */
   /*
    * If this unlinked inode is in the middle of recovery, don't let drop_inode
    * truncate and free the inode.  This can happen if we iget the inode during
@@@ -247,6 -250,21 +249,21 @@@
   #define XFS_IRECOVERY         (1 << 11)
   #define XFS_ICOWBLOCKS                (1 << 12)/* has the cowblocks tag set */
   
+ /*
+  * If we need to update on-disk metadata before this IRECLAIMABLE inode can be
+  * freed, then NEED_INACTIVE will be set.  Once we start the updates, the
+  * INACTIVATING bit will be set to keep iget away from this inode.  After the
+  * inactivation completes, both flags will be cleared and the inode is a
+  * plain old IRECLAIMABLE inode.
+  */
+ #define XFS_INACTIVATING      (1 << 13)
+ 
+ /* All inode state flags related to inode reclaim. */
+ #define XFS_ALL_IRECLAIM_FLAGS        (XFS_IRECLAIMABLE | \
+                                XFS_IRECLAIM | \
+                                XFS_NEED_INACTIVE | \
+                                XFS_INACTIVATING)
+ 
   /*
    * Per-lifetime flags need to be reset when re-using a reclaimable inode during
    * inode lookup. This prevents unintended behaviour on the new inode from
@@@ -254,7 -272,8 +271,8 @@@
    */
   #define XFS_IRECLAIM_RESET_FLAGS      \
         (XFS_IRECLAIMABLE | XFS_IRECLAIM | \
-        XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
+        XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
+        XFS_INACTIVATING)
   
   /*
    * Flags for inode locking.
@@@ -381,8 -400,7 +399,7 @@@ enum layout_break_reason 
    * new subdirectory gets S_ISGID bit from parent.
    */
   #define XFS_INHERIT_GID(pip)  \
-       (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
-        (VFS_I(pip)->i_mode & S_ISGID))
+       (xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID))
   
   int           xfs_release(struct xfs_inode *ip);
   void          xfs_inactive(struct xfs_inode *ip);
@@@ -409,7 -427,7 +426,7 @@@ void               xfs_ilock(xfs_inode_t *, uint)
   int           xfs_ilock_nowait(xfs_inode_t *, uint);
   void          xfs_iunlock(xfs_inode_t *, uint);
   void          xfs_ilock_demote(xfs_inode_t *, uint);
- -int           xfs_isilocked(xfs_inode_t *, uint);
+ +bool          xfs_isilocked(struct xfs_inode *, uint);
   uint          xfs_ilock_data_map_shared(struct xfs_inode *);
   uint          xfs_ilock_attr_map_shared(struct xfs_inode *);
   
@@@ -492,6 -510,8 +509,8 @@@ extern struct kmem_zone    *xfs_inode_zone
   /* The default CoW extent size hint. */
   #define XFS_DEFAULT_COWEXTSZ_HINT 32
   
+ bool xfs_inode_needs_inactive(struct xfs_inode *ip);
+ 
   int xfs_iunlink_init(struct xfs_perag *pag);
   void xfs_iunlink_destroy(struct xfs_perag *pag);
   
diff --combined fs/xfs/xfs_super.c

index 102cbd6066331e73eccf90636919cc8a141dfcd3,5e73ac78bf2f2797f73bbe72e564b1f9019c7387..9a86d3ec2cb612d1b3dd48bc4a0146bfd28873b4
--- 1/fs/xfs/xfs_super.c
--- 2/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@@ -49,6 -49,28 +49,28 @@@ static struct kset *xfs_kset;               /* top-l
   static struct xfs_kobj xfs_dbg_kobj;  /* global debug sysfs attrs */
   #endif
   
+ #ifdef CONFIG_HOTPLUG_CPU
+ static LIST_HEAD(xfs_mount_list);
+ static DEFINE_SPINLOCK(xfs_mount_list_lock);
+ 
+ static inline void xfs_mount_list_add(struct xfs_mount *mp)
+ {
+       spin_lock(&xfs_mount_list_lock);
+       list_add(&mp->m_mount_list, &xfs_mount_list);
+       spin_unlock(&xfs_mount_list_lock);
+ }
+ 
+ static inline void xfs_mount_list_del(struct xfs_mount *mp)
+ {
+       spin_lock(&xfs_mount_list_lock);
+       list_del(&mp->m_mount_list);
+       spin_unlock(&xfs_mount_list_lock);
+ }
+ #else /* !CONFIG_HOTPLUG_CPU */
+ static inline void xfs_mount_list_add(struct xfs_mount *mp) {}
+ static inline void xfs_mount_list_del(struct xfs_mount *mp) {}
+ #endif
+ 
   enum xfs_dax_mode {
         XFS_DAX_INODE = 0,
         XFS_DAX_ALWAYS = 1,
@@@ -62,15 -84,15 +84,15 @@@ xfs_mount_set_dax_mode
   {
         switch (mode) {
         case XFS_DAX_INODE:
-               mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER);
+               mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
                 break;
         case XFS_DAX_ALWAYS:
-               mp->m_flags |= XFS_MOUNT_DAX_ALWAYS;
-               mp->m_flags &= ~XFS_MOUNT_DAX_NEVER;
+               mp->m_features |= XFS_FEAT_DAX_ALWAYS;
+               mp->m_features &= ~XFS_FEAT_DAX_NEVER;
                 break;
         case XFS_DAX_NEVER:
-               mp->m_flags |= XFS_MOUNT_DAX_NEVER;
-               mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS;
+               mp->m_features |= XFS_FEAT_DAX_NEVER;
+               mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
                 break;
         }
   }
@@@ -154,33 -176,32 +176,32 @@@ xfs_fs_show_options
   {
         static struct proc_xfs_info xfs_info_set[] = {
                 /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_IKEEP,              ",ikeep" },
-               { XFS_MOUNT_WSYNC,              ",wsync" },
-               { XFS_MOUNT_NOALIGN,            ",noalign" },
-               { XFS_MOUNT_SWALLOC,            ",swalloc" },
-               { XFS_MOUNT_NOUUID,             ",nouuid" },
-               { XFS_MOUNT_NORECOVERY,         ",norecovery" },
-               { XFS_MOUNT_ATTR2,              ",attr2" },
-               { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
-               { XFS_MOUNT_GRPID,              ",grpid" },
-               { XFS_MOUNT_DISCARD,            ",discard" },
-               { XFS_MOUNT_LARGEIO,            ",largeio" },
-               { XFS_MOUNT_DAX_ALWAYS,         ",dax=always" },
-               { XFS_MOUNT_DAX_NEVER,          ",dax=never" },
+               { XFS_FEAT_IKEEP,               ",ikeep" },
+               { XFS_FEAT_WSYNC,               ",wsync" },
+               { XFS_FEAT_NOALIGN,             ",noalign" },
+               { XFS_FEAT_SWALLOC,             ",swalloc" },
+               { XFS_FEAT_NOUUID,              ",nouuid" },
+               { XFS_FEAT_NORECOVERY,          ",norecovery" },
+               { XFS_FEAT_ATTR2,               ",attr2" },
+               { XFS_FEAT_FILESTREAMS,         ",filestreams" },
+               { XFS_FEAT_GRPID,               ",grpid" },
+               { XFS_FEAT_DISCARD,             ",discard" },
+               { XFS_FEAT_LARGE_IOSIZE,        ",largeio" },
+               { XFS_FEAT_DAX_ALWAYS,          ",dax=always" },
+               { XFS_FEAT_DAX_NEVER,           ",dax=never" },
                 { 0, NULL }
         };
         struct xfs_mount        *mp = XFS_M(root->d_sb);
         struct proc_xfs_info    *xfs_infop;
   
         for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
-               if (mp->m_flags & xfs_infop->flag)
+               if (mp->m_features & xfs_infop->flag)
                         seq_puts(m, xfs_infop->str);
         }
   
-       seq_printf(m, ",inode%d",
-               (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64);
+       seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
   
-       if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
+       if (xfs_has_allocsize(mp))
                 seq_printf(m, ",allocsize=%dk",
                            (1 << mp->m_allocsize_log) >> 10);
   
@@@ -201,25 -222,20 +222,20 @@@
                 seq_printf(m, ",swidth=%d",
                                 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
   
-       if (mp->m_qflags & XFS_UQUOTA_ACCT) {
-               if (mp->m_qflags & XFS_UQUOTA_ENFD)
-                       seq_puts(m, ",usrquota");
-               else
-                       seq_puts(m, ",uqnoenforce");
-       }
+       if (mp->m_qflags & XFS_UQUOTA_ENFD)
+               seq_puts(m, ",usrquota");
+       else if (mp->m_qflags & XFS_UQUOTA_ACCT)
+               seq_puts(m, ",uqnoenforce");
   
-       if (mp->m_qflags & XFS_PQUOTA_ACCT) {
-               if (mp->m_qflags & XFS_PQUOTA_ENFD)
-                       seq_puts(m, ",prjquota");
-               else
-                       seq_puts(m, ",pqnoenforce");
-       }
-       if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-               if (mp->m_qflags & XFS_GQUOTA_ENFD)
-                       seq_puts(m, ",grpquota");
-               else
-                       seq_puts(m, ",gqnoenforce");
-       }
+       if (mp->m_qflags & XFS_PQUOTA_ENFD)
+               seq_puts(m, ",prjquota");
+       else if (mp->m_qflags & XFS_PQUOTA_ACCT)
+               seq_puts(m, ",pqnoenforce");
+ 
+       if (mp->m_qflags & XFS_GQUOTA_ENFD)
+               seq_puts(m, ",grpquota");
+       else if (mp->m_qflags & XFS_GQUOTA_ACCT)
+               seq_puts(m, ",gqnoenforce");
   
         if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
                 seq_puts(m, ",noquota");
@@@ -230,11 -246,11 +246,11 @@@
   /*
    * Set parameters for inode allocation heuristics, taking into account
    * filesystem size and inode32/inode64 mount options; i.e. specifically
-  * whether or not XFS_MOUNT_SMALL_INUMS is set.
+  * whether or not XFS_FEAT_SMALL_INUMS is set.
    *
    * Inode allocation patterns are altered only if inode32 is requested
-  * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
-  * If altered, XFS_MOUNT_32BITINODES is set as well.
+  * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
+  * If altered, XFS_OPSTATE_INODE32 is set as well.
    *
    * An agcount independent of that in the mount structure is provided
    * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
@@@ -276,13 -292,13 +292,13 @@@ xfs_set_inode_alloc
   
         /*
          * If user asked for no more than 32-bit inodes, and the fs is
-        * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
+        * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
          * the allocator to accommodate the request.
          */
-       if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
-               mp->m_flags |= XFS_MOUNT_32BITINODES;
+       if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
+               set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
         else
-               mp->m_flags &= ~XFS_MOUNT_32BITINODES;
+               clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
   
         for (index = 0; index < agcount; index++) {
                 struct xfs_perag        *pag;
@@@ -291,7 -307,7 +307,7 @@@
   
                 pag = xfs_perag_get(mp, index);
   
-               if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+               if (xfs_is_inode32(mp)) {
                         if (ino > XFS_MAXINUMBER_32) {
                                 pag->pagi_inodeok = 0;
                                 pag->pagf_metadata = 0;
@@@ -311,7 -327,7 +327,7 @@@
                 xfs_perag_put(pag);
         }
   
-       return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
+       return xfs_is_inode32(mp) ? maxagi : agcount;
   }
   
   STATIC int
@@@ -468,7 -484,7 +484,7 @@@ xfs_setup_devices
         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
                 unsigned int    log_sector_size = BBSIZE;
   
-               if (xfs_sb_version_hassector(&mp->m_sb))
+               if (xfs_has_sector(mp))
                         log_sector_size = mp->m_sb.sb_logsectsize;
                 error = xfs_setsize_buftarg(mp->m_logdev_targp,
                                             log_sector_size);
@@@ -501,37 -517,37 +517,37 @@@ xfs_init_mount_workqueues
         if (!mp->m_unwritten_workqueue)
                 goto out_destroy_buf;
   
-       mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
-                       XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND),
-                       0, mp->m_super->s_id);
-       if (!mp->m_cil_workqueue)
-               goto out_destroy_unwritten;
- 
         mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
                         XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
                         0, mp->m_super->s_id);
         if (!mp->m_reclaim_workqueue)
-               goto out_destroy_cil;
+               goto out_destroy_unwritten;
   
-       mp->m_gc_workqueue = alloc_workqueue("xfs-gc/%s",
-                       WQ_SYSFS | WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM,
+       mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
+                       XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
                         0, mp->m_super->s_id);
-       if (!mp->m_gc_workqueue)
+       if (!mp->m_blockgc_wq)
                 goto out_destroy_reclaim;
   
+       mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
+                       XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+                       1, mp->m_super->s_id);
+       if (!mp->m_inodegc_wq)
+               goto out_destroy_blockgc;
+ 
         mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
                         XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
         if (!mp->m_sync_workqueue)
-               goto out_destroy_eofb;
+               goto out_destroy_inodegc;
   
         return 0;
   
- out_destroy_eofb:
-       destroy_workqueue(mp->m_gc_workqueue);
+ out_destroy_inodegc:
+       destroy_workqueue(mp->m_inodegc_wq);
+ out_destroy_blockgc:
+       destroy_workqueue(mp->m_blockgc_wq);
   out_destroy_reclaim:
         destroy_workqueue(mp->m_reclaim_workqueue);
- out_destroy_cil:
-       destroy_workqueue(mp->m_cil_workqueue);
   out_destroy_unwritten:
         destroy_workqueue(mp->m_unwritten_workqueue);
   out_destroy_buf:
@@@ -545,9 -561,9 +561,9 @@@ xfs_destroy_mount_workqueues
         struct xfs_mount        *mp)
   {
         destroy_workqueue(mp->m_sync_workqueue);
-       destroy_workqueue(mp->m_gc_workqueue);
+       destroy_workqueue(mp->m_blockgc_wq);
+       destroy_workqueue(mp->m_inodegc_wq);
         destroy_workqueue(mp->m_reclaim_workqueue);
-       destroy_workqueue(mp->m_cil_workqueue);
         destroy_workqueue(mp->m_unwritten_workqueue);
         destroy_workqueue(mp->m_buf_workqueue);
   }
@@@ -596,32 -612,6 +612,6 @@@ xfs_fs_alloc_inode
         return NULL;
   }
   
- #ifdef DEBUG
- static void
- xfs_check_delalloc(
-       struct xfs_inode        *ip,
-       int                     whichfork)
- {
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
-       struct xfs_bmbt_irec    got;
-       struct xfs_iext_cursor  icur;
- 
-       if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
-               return;
-       do {
-               if (isnullstartblock(got.br_startblock)) {
-                       xfs_warn(ip->i_mount,
-       "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
-                               ip->i_ino,
-                               whichfork == XFS_DATA_FORK ? "data" : "cow",
-                               got.br_startoff, got.br_blockcount);
-               }
-       } while (xfs_iext_next_extent(ifp, &icur, &got));
- }
- #else
- #define xfs_check_delalloc(ip, whichfork)     do { } while (0)
- #endif
- 
   /*
    * Now that the generic code is guaranteed not to be accessing
    * the linux inode, we can inactivate and reclaim the inode.
@@@ -637,30 -627,6 +627,6 @@@ xfs_fs_destroy_inode
         ASSERT(!rwsem_is_locked(&inode->i_rwsem));
         XFS_STATS_INC(ip->i_mount, vn_rele);
         XFS_STATS_INC(ip->i_mount, vn_remove);
- 
-       xfs_inactive(ip);
- 
-       if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
-               xfs_check_delalloc(ip, XFS_DATA_FORK);
-               xfs_check_delalloc(ip, XFS_COW_FORK);
-               ASSERT(0);
-       }
- 
-       XFS_STATS_INC(ip->i_mount, vn_reclaim);
- 
-       /*
-        * We should never get here with one of the reclaim flags already set.
-        */
-       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
- 
-       /*
-        * We always use background reclaim here because even if the inode is
-        * clean, it still may be under IO and hence we have wait for IO
-        * completion to occur before we can reclaim the inode. The background
-        * reclaim path handles this more efficiently than we can here, so
-        * simply let background reclaim tear down all inodes.
-        */
         xfs_inode_mark_reclaimable(ip);
   }
   
@@@ -709,6 -675,8 +675,6 @@@ xfs_fs_inode_init_once
         atomic_set(&ip->i_pincount, 0);
         spin_lock_init(&ip->i_flags_lock);
   
- -      mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
- -                   "xfsino", ip->i_ino);
         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                      "xfsino", ip->i_ino);
   }
@@@ -732,7 -700,7 +698,7 @@@ xfs_fs_drop_inode
          * that.  See the comment for this inode flag.
          */
         if (ip->i_flags & XFS_IRECOVERY) {
-               ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
+               ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
                 return 0;
         }
   
@@@ -755,6 -723,8 +721,8 @@@ xfs_fs_sync_fs
   {
         struct xfs_mount        *mp = XFS_M(sb);
   
+       trace_xfs_fs_sync_fs(mp, __return_address);
+ 
         /*
          * Doing anything during the async pass would be counterproductive.
          */
@@@ -771,6 -741,25 +739,25 @@@
                 flush_delayed_work(&mp->m_log->l_work);
         }
   
+       /*
+        * If we are called with page faults frozen out, it means we are about
+        * to freeze the transaction subsystem. Take the opportunity to shut
+        * down inodegc because once SB_FREEZE_FS is set it's too late to
+        * prevent inactivation races with freeze. The fs doesn't get called
+        * again by the freezing process until after SB_FREEZE_FS has been set,
+        * so it's now or never.  Same logic applies to speculative allocation
+        * garbage collection.
+        *
+        * We don't care if this is a normal syncfs call that does this or
+        * freeze that does this - we can run this multiple times without issue
+        * and we won't race with a restart because a restart can only occur
+        * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
+        */
+       if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
+               xfs_inodegc_stop(mp);
+               xfs_blockgc_stop(mp);
+       }
+ 
         return 0;
   }
   
@@@ -789,6 -778,9 +776,9 @@@ xfs_fs_statfs
         xfs_extlen_t            lsize;
         int64_t                 ffree;
   
+       /* Wait for whatever inactivations are in progress. */
+       xfs_inodegc_flush(mp);
+ 
         statp->f_type = XFS_SUPER_MAGIC;
         statp->f_namelen = MAXNAMELEN - 1;
   
@@@ -884,10 -876,22 +874,22 @@@ xfs_fs_freeze
          * set a GFP_NOFS context here to avoid recursion deadlocks.
          */
         flags = memalloc_nofs_save();
-       xfs_blockgc_stop(mp);
         xfs_save_resvblks(mp);
         ret = xfs_log_quiesce(mp);
         memalloc_nofs_restore(flags);
+ 
+       /*
+        * For read-write filesystems, we need to restart the inodegc on error
+        * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
+        * going to be run to restart it now.  We are at SB_FREEZE_FS level
+        * here, so we can restart safely without racing with a stop in
+        * xfs_fs_sync_fs().
+        */
+       if (ret && !xfs_is_readonly(mp)) {
+               xfs_blockgc_start(mp);
+               xfs_inodegc_start(mp);
+       }
+ 
         return ret;
   }
   
@@@ -899,7 -903,18 +901,18 @@@ xfs_fs_unfreeze
   
         xfs_restore_resvblks(mp);
         xfs_log_work_queue(mp);
-       xfs_blockgc_start(mp);
+ 
+       /*
+        * Don't reactivate the inodegc worker on a readonly filesystem because
+        * inodes are sent directly to reclaim.  Don't reactivate the blockgc
+        * worker because there are no speculative preallocations on a readonly
+        * filesystem.
+        */
+       if (!xfs_is_readonly(mp)) {
+               xfs_blockgc_start(mp);
+               xfs_inodegc_start(mp);
+       }
+ 
         return 0;
   }
   
@@@ -911,10 -926,8 +924,8 @@@ STATIC in
   xfs_finish_flags(
         struct xfs_mount        *mp)
   {
-       int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
- 
         /* Fail a mount where the logbuf is smaller than the log stripe */
-       if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+       if (xfs_has_logv2(mp)) {
                 if (mp->m_logbsize <= 0 &&
                     mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
                         mp->m_logbsize = mp->m_sb.sb_logsunit;
@@@ -936,33 -949,24 +947,24 @@@
         /*
          * V5 filesystems always use attr2 format for attributes.
          */
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+       if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
                 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
                              "attr2 is always enabled for V5 filesystems.");
                 return -EINVAL;
         }
   
-       /*
-        * mkfs'ed attr2 will turn on attr2 mount unless explicitly
-        * told by noattr2 to turn it off
-        */
-       if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-           !(mp->m_flags & XFS_MOUNT_NOATTR2))
-               mp->m_flags |= XFS_MOUNT_ATTR2;
- 
         /*
          * prohibit r/w mounts of read-only filesystems
          */
-       if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+       if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
                 xfs_warn(mp,
                         "cannot mount a read-only filesystem as read-write");
                 return -EROFS;
         }
   
-       if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
-           (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
-           !xfs_sb_version_has_pquotino(&mp->m_sb)) {
+       if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
+           (mp->m_qflags & XFS_PQUOTA_ACCT) &&
+           !xfs_has_pquotino(mp)) {
                 xfs_warn(mp,
                   "Super block does not support project and group quota together");
                 return -EINVAL;
@@@ -1020,11 -1024,40 +1022,40 @@@ xfs_destroy_percpu_counters
         percpu_counter_destroy(&mp->m_icount);
         percpu_counter_destroy(&mp->m_ifree);
         percpu_counter_destroy(&mp->m_fdblocks);
-       ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+       ASSERT(xfs_is_shutdown(mp) ||
                percpu_counter_sum(&mp->m_delalloc_blks) == 0);
         percpu_counter_destroy(&mp->m_delalloc_blks);
   }
   
+ static int
+ xfs_inodegc_init_percpu(
+       struct xfs_mount        *mp)
+ {
+       struct xfs_inodegc      *gc;
+       int                     cpu;
+ 
+       mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
+       if (!mp->m_inodegc)
+               return -ENOMEM;
+ 
+       for_each_possible_cpu(cpu) {
+               gc = per_cpu_ptr(mp->m_inodegc, cpu);
+               init_llist_head(&gc->list);
+               gc->items = 0;
+               INIT_WORK(&gc->work, xfs_inodegc_worker);
+       }
+       return 0;
+ }
+ 
+ static void
+ xfs_inodegc_free_percpu(
+       struct xfs_mount        *mp)
+ {
+       if (!mp->m_inodegc)
+               return;
+       free_percpu(mp->m_inodegc);
+ }
+ 
   static void
   xfs_fs_put_super(
         struct super_block      *sb)
@@@ -1041,6 -1074,8 +1072,8 @@@
   
         xfs_freesb(mp);
         free_percpu(mp->m_stats.xs_stats);
+       xfs_mount_list_del(mp);
+       xfs_inodegc_free_percpu(mp);
         xfs_destroy_percpu_counters(mp);
         xfs_destroy_mount_workqueues(mp);
         xfs_close_devices(mp);
@@@ -1129,7 -1164,7 +1162,7 @@@ xfs_fs_warn_deprecated
          * already had the flag set
          */
         if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
-                       !!(XFS_M(fc->root->d_sb)->m_flags & flag) == value)
+             !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
                 return;
         xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
   }
@@@ -1177,27 -1212,27 +1210,27 @@@ xfs_fs_parse_param
                 if (suffix_kstrtoint(param->string, 10, &size))
                         return -EINVAL;
                 parsing_mp->m_allocsize_log = ffs(size) - 1;
-               parsing_mp->m_flags |= XFS_MOUNT_ALLOCSIZE;
+               parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
                 return 0;
         case Opt_grpid:
         case Opt_bsdgroups:
-               parsing_mp->m_flags |= XFS_MOUNT_GRPID;
+               parsing_mp->m_features |= XFS_FEAT_GRPID;
                 return 0;
         case Opt_nogrpid:
         case Opt_sysvgroups:
-               parsing_mp->m_flags &= ~XFS_MOUNT_GRPID;
+               parsing_mp->m_features &= ~XFS_FEAT_GRPID;
                 return 0;
         case Opt_wsync:
-               parsing_mp->m_flags |= XFS_MOUNT_WSYNC;
+               parsing_mp->m_features |= XFS_FEAT_WSYNC;
                 return 0;
         case Opt_norecovery:
-               parsing_mp->m_flags |= XFS_MOUNT_NORECOVERY;
+               parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
                 return 0;
         case Opt_noalign:
-               parsing_mp->m_flags |= XFS_MOUNT_NOALIGN;
+               parsing_mp->m_features |= XFS_FEAT_NOALIGN;
                 return 0;
         case Opt_swalloc:
-               parsing_mp->m_flags |= XFS_MOUNT_SWALLOC;
+               parsing_mp->m_features |= XFS_FEAT_SWALLOC;
                 return 0;
         case Opt_sunit:
                 parsing_mp->m_dalign = result.uint_32;
@@@ -1206,62 -1241,58 +1239,58 @@@
                 parsing_mp->m_swidth = result.uint_32;
                 return 0;
         case Opt_inode32:
-               parsing_mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+               parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
                 return 0;
         case Opt_inode64:
-               parsing_mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+               parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
                 return 0;
         case Opt_nouuid:
-               parsing_mp->m_flags |= XFS_MOUNT_NOUUID;
+               parsing_mp->m_features |= XFS_FEAT_NOUUID;
                 return 0;
         case Opt_largeio:
-               parsing_mp->m_flags |= XFS_MOUNT_LARGEIO;
+               parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
                 return 0;
         case Opt_nolargeio:
-               parsing_mp->m_flags &= ~XFS_MOUNT_LARGEIO;
+               parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
                 return 0;
         case Opt_filestreams:
-               parsing_mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+               parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
                 return 0;
         case Opt_noquota:
                 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
                 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
-               parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
                 return 0;
         case Opt_quota:
         case Opt_uquota:
         case Opt_usrquota:
-               parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-                                XFS_UQUOTA_ENFD);
+               parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
                 return 0;
         case Opt_qnoenforce:
         case Opt_uqnoenforce:
-               parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+               parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
                 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
                 return 0;
         case Opt_pquota:
         case Opt_prjquota:
-               parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-                                XFS_PQUOTA_ENFD);
+               parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
                 return 0;
         case Opt_pqnoenforce:
-               parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+               parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
                 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
                 return 0;
         case Opt_gquota:
         case Opt_grpquota:
-               parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-                                XFS_GQUOTA_ENFD);
+               parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
                 return 0;
         case Opt_gqnoenforce:
-               parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+               parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
                 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
                 return 0;
         case Opt_discard:
-               parsing_mp->m_flags |= XFS_MOUNT_DISCARD;
+               parsing_mp->m_features |= XFS_FEAT_DISCARD;
                 return 0;
         case Opt_nodiscard:
-               parsing_mp->m_flags &= ~XFS_MOUNT_DISCARD;
+               parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
                 return 0;
   #ifdef CONFIG_FS_DAX
         case Opt_dax:
@@@ -1273,21 -1304,20 +1302,20 @@@
   #endif
         /* Following mount options will be removed in September 2025 */
         case Opt_ikeep:
-               xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, true);
-               parsing_mp->m_flags |= XFS_MOUNT_IKEEP;
+               xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
+               parsing_mp->m_features |= XFS_FEAT_IKEEP;
                 return 0;
         case Opt_noikeep:
-               xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, false);
-               parsing_mp->m_flags &= ~XFS_MOUNT_IKEEP;
+               xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
+               parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
                 return 0;
         case Opt_attr2:
-               xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_ATTR2, true);
-               parsing_mp->m_flags |= XFS_MOUNT_ATTR2;
+               xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
+               parsing_mp->m_features |= XFS_FEAT_ATTR2;
                 return 0;
         case Opt_noattr2:
-               xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_NOATTR2, true);
-               parsing_mp->m_flags &= ~XFS_MOUNT_ATTR2;
-               parsing_mp->m_flags |= XFS_MOUNT_NOATTR2;
+               xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
+               parsing_mp->m_features |= XFS_FEAT_NOATTR2;
                 return 0;
         default:
                 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
@@@ -1301,17 -1331,23 +1329,23 @@@ static in
   xfs_fs_validate_params(
         struct xfs_mount        *mp)
   {
+       /* No recovery flag requires a read-only mount */
+       if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
+               xfs_warn(mp, "no-recovery mounts must be read-only.");
+               return -EINVAL;
+       }
+ 
         /*
-        * no recovery flag requires a read-only mount
+        * We have not read the superblock at this point, so only the attr2
+        * mount option can set the attr2 feature by this stage.
          */
-       if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
-           !(mp->m_flags & XFS_MOUNT_RDONLY)) {
-               xfs_warn(mp, "no-recovery mounts must be read-only.");
+       if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
+               xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
                 return -EINVAL;
         }
   
-       if ((mp->m_flags & XFS_MOUNT_NOALIGN) &&
-           (mp->m_dalign || mp->m_swidth)) {
+ 
+       if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
                 xfs_warn(mp,
         "sunit and swidth options incompatible with the noalign option");
                 return -EINVAL;
@@@ -1355,7 -1391,7 +1389,7 @@@
                 return -EINVAL;
         }
   
-       if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) &&
+       if (xfs_has_allocsize(mp) &&
             (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
              mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
                 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
@@@ -1416,11 -1452,22 +1450,22 @@@ xfs_fs_fill_super
         if (error)
                 goto out_destroy_workqueues;
   
+       error = xfs_inodegc_init_percpu(mp);
+       if (error)
+               goto out_destroy_counters;
+ 
+       /*
+        * All percpu data structures requiring cleanup when a cpu goes offline
+        * must be allocated before adding this @mp to the cpu-dead handler's
+        * mount list.
+        */
+       xfs_mount_list_add(mp);
+ 
         /* Allocate stats memory before we do operations that might use it */
         mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
         if (!mp->m_stats.xs_stats) {
                 error = -ENOMEM;
-               goto out_destroy_counters;
+               goto out_destroy_inodegc;
         }
   
         error = xfs_readsb(mp, flags);
@@@ -1436,7 -1483,7 +1481,7 @@@
                 goto out_free_sb;
   
         /* V4 support is undergoing deprecation. */
-       if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+       if (!xfs_has_crc(mp)) {
   #ifdef CONFIG_XFS_SUPPORT_V4
                 xfs_warn_once(mp,
         "Deprecated V4 format (crc=0) will not be supported after September 2030.");
@@@ -1449,7 -1496,7 +1494,7 @@@
         }
   
         /* Filesystem claims it needs repair, so refuse the mount. */
-       if (xfs_sb_version_needsrepair(&mp->m_sb)) {
+       if (xfs_has_needsrepair(mp)) {
                 xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
                 error = -EFSCORRUPTED;
                 goto out_free_sb;
@@@ -1521,7 -1568,7 +1566,7 @@@
         sb->s_maxbytes = MAX_LFS_FILESIZE;
         sb->s_max_links = XFS_MAXLINK;
         sb->s_time_gran = 1;
-       if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
+       if (xfs_has_bigtime(mp)) {
                 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
                 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
         } else {
@@@ -1534,14 -1581,10 +1579,10 @@@
         set_posix_acl_flag(sb);
   
         /* version 5 superblocks support inode version counters. */
-       if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
+       if (xfs_has_crc(mp))
                 sb->s_flags |= SB_I_VERSION;
   
-       if (xfs_sb_version_hasbigtime(&mp->m_sb))
-               xfs_warn(mp,
-  "EXPERIMENTAL big timestamp feature in use. Use at your own risk!");
- 
-       if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) {
+       if (xfs_has_dax_always(mp)) {
                 bool rtdev_is_dax = false, datadev_is_dax;
   
                 xfs_warn(mp,
@@@ -1557,7 -1600,7 +1598,7 @@@
                         "DAX unsupported by block device. Turning off DAX.");
                         xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
                 }
-               if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+               if (xfs_has_reflink(mp)) {
                         xfs_alert(mp,
                 "DAX and reflink cannot be used together!");
                         error = -EINVAL;
@@@ -1565,17 -1608,17 +1606,17 @@@
                 }
         }
   
-       if (mp->m_flags & XFS_MOUNT_DISCARD) {
+       if (xfs_has_discard(mp)) {
                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
   
                 if (!blk_queue_discard(q)) {
                         xfs_warn(mp, "mounting with \"discard\" option, but "
                                         "the device does not support discard");
-                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
+                       mp->m_features &= ~XFS_FEAT_DISCARD;
                 }
         }
   
-       if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+       if (xfs_has_reflink(mp)) {
                 if (mp->m_sb.sb_rblocks) {
                         xfs_alert(mp,
         "reflink not compatible with realtime device!");
@@@ -1589,17 -1632,13 +1630,13 @@@
                 }
         }
   
-       if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
+       if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
                 xfs_alert(mp,
         "reverse mapping btree not compatible with realtime device!");
                 error = -EINVAL;
                 goto out_filestream_unmount;
         }
   
-       if (xfs_sb_version_hasinobtcounts(&mp->m_sb))
-               xfs_warn(mp,
-  "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!");
- 
         error = xfs_mountfs(mp);
         if (error)
                 goto out_filestream_unmount;
@@@ -1623,6 -1662,9 +1660,9 @@@
         xfs_freesb(mp);
    out_free_stats:
         free_percpu(mp->m_stats.xs_stats);
+  out_destroy_inodegc:
+       xfs_mount_list_del(mp);
+       xfs_inodegc_free_percpu(mp);
    out_destroy_counters:
         xfs_destroy_percpu_counters(mp);
    out_destroy_workqueues:
@@@ -1654,13 -1696,13 +1694,13 @@@ xfs_remount_rw
         struct xfs_sb           *sbp = &mp->m_sb;
         int error;
   
-       if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
+       if (xfs_has_norecovery(mp)) {
                 xfs_warn(mp,
                         "ro->rw transition prohibited on norecovery mount");
                 return -EINVAL;
         }
   
-       if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+       if (xfs_sb_is_v5(sbp) &&
             xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
                 xfs_warn(mp,
         "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
@@@ -1669,7 -1711,7 +1709,7 @@@
                 return -EINVAL;
         }
   
-       mp->m_flags &= ~XFS_MOUNT_RDONLY;
+       clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
   
         /*
          * If this is the first remount to writeable state we might have some
@@@ -1706,6 -1748,9 +1746,9 @@@
         if (error && error != -ENOSPC)
                 return error;
   
+       /* Re-enable the background inode inactivation worker. */
+       xfs_inodegc_start(mp);
+ 
         return 0;
   }
   
@@@ -1728,6 -1773,15 +1771,15 @@@ xfs_remount_ro
                 return error;
         }
   
+       /*
+        * Stop the inodegc background worker.  xfs_fs_reconfigure already
+        * flushed all pending inodegc work when it sync'd the filesystem.
+        * The VFS holds s_umount, so we know that inodes cannot enter
+        * xfs_fs_destroy_inode during a remount operation.  In readonly mode
+        * we send inodes straight to reclaim, so no inodes will be queued.
+        */
+       xfs_inodegc_stop(mp);
+ 
         /* Free the per-AG metadata reservation pool. */
         error = xfs_fs_unreserve_ag_blocks(mp);
         if (error) {
@@@ -1745,7 -1799,7 +1797,7 @@@
         xfs_save_resvblks(mp);
   
         xfs_log_clean(mp);
-       mp->m_flags |= XFS_MOUNT_RDONLY;
+       set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
   
         return 0;
   }
@@@ -1768,12 -1822,11 +1820,11 @@@ xfs_fs_reconfigure
   {
         struct xfs_mount        *mp = XFS_M(fc->root->d_sb);
         struct xfs_mount        *new_mp = fc->s_fs_info;
-       xfs_sb_t                *sbp = &mp->m_sb;
         int                     flags = fc->sb_flags;
         int                     error;
   
         /* version 5 superblocks always support version counters. */
-       if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
+       if (xfs_has_crc(mp))
                 fc->sb_flags |= SB_I_VERSION;
   
         error = xfs_fs_validate_params(new_mp);
@@@ -1783,28 -1836,26 +1834,26 @@@
         sync_filesystem(mp->m_super);
   
         /* inode32 -> inode64 */
-       if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
-           !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
-               mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-               mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
+       if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
+               mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
+               mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
         }
   
         /* inode64 -> inode32 */
-       if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
-           (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
-               mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-               mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
+       if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
+               mp->m_features |= XFS_FEAT_SMALL_INUMS;
+               mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
         }
   
         /* ro -> rw */
-       if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) {
+       if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
                 error = xfs_remount_rw(mp);
                 if (error)
                         return error;
         }
   
         /* rw -> ro */
-       if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) {
+       if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
                 error = xfs_remount_ro(mp);
                 if (error)
                         return error;
@@@ -1871,11 -1922,11 +1920,11 @@@ static int xfs_init_fs_context
          * Copy binary VFS mount flags we are interested in.
          */
         if (fc->sb_flags & SB_RDONLY)
-               mp->m_flags |= XFS_MOUNT_RDONLY;
+               set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
         if (fc->sb_flags & SB_DIRSYNC)
-               mp->m_flags |= XFS_MOUNT_DIRSYNC;
+               mp->m_features |= XFS_FEAT_DIRSYNC;
         if (fc->sb_flags & SB_SYNCHRONOUS)
-               mp->m_flags |= XFS_MOUNT_WSYNC;
+               mp->m_features |= XFS_FEAT_WSYNC;
   
         fc->s_fs_info = mp;
         fc->ops = &xfs_context_ops;
@@@ -2118,6 -2169,48 +2167,48 @@@ xfs_destroy_workqueues(void
         destroy_workqueue(xfs_alloc_wq);
   }
   
+ #ifdef CONFIG_HOTPLUG_CPU
+ static int
+ xfs_cpu_dead(
+       unsigned int            cpu)
+ {
+       struct xfs_mount        *mp, *n;
+ 
+       spin_lock(&xfs_mount_list_lock);
+       list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) {
+               spin_unlock(&xfs_mount_list_lock);
+               xfs_inodegc_cpu_dead(mp, cpu);
+               spin_lock(&xfs_mount_list_lock);
+       }
+       spin_unlock(&xfs_mount_list_lock);
+       return 0;
+ }
+ 
+ static int __init
+ xfs_cpu_hotplug_init(void)
+ {
+       int     error;
+ 
+       error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL,
+                       xfs_cpu_dead);
+       if (error < 0)
+               xfs_alert(NULL,
+ "Failed to initialise CPU hotplug, error %d. XFS is non-functional.",
+                       error);
+       return error;
+ }
+ 
+ static void
+ xfs_cpu_hotplug_destroy(void)
+ {
+       cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD);
+ }
+ 
+ #else /* !CONFIG_HOTPLUG_CPU */
+ static inline int xfs_cpu_hotplug_init(void) { return 0; }
+ static inline void xfs_cpu_hotplug_destroy(void) {}
+ #endif
+ 
   STATIC int __init
   init_xfs_fs(void)
   {
@@@ -2130,10 -2223,14 +2221,14 @@@
   
         xfs_dir_startup();
   
-       error = xfs_init_zones();
+       error = xfs_cpu_hotplug_init();
         if (error)
                 goto out;
   
+       error = xfs_init_zones();
+       if (error)
+               goto out_destroy_hp;
+ 
         error = xfs_init_workqueues();
         if (error)
                 goto out_destroy_zones;
@@@ -2213,6 -2310,8 +2308,8 @@@
         xfs_destroy_workqueues();
    out_destroy_zones:
         xfs_destroy_zones();
+  out_destroy_hp:
+       xfs_cpu_hotplug_destroy();
    out:
         return error;
   }
@@@ -2235,6 -2334,7 +2332,7 @@@ exit_xfs_fs(void
         xfs_destroy_workqueues();
         xfs_destroy_zones();
         xfs_uuid_table_free();
+       xfs_cpu_hotplug_destroy();
   }
   
   module_init(init_xfs_fs);
diff --combined include/linux/cpuhotplug.h

index 95f88edc8f0951ab666a87f8f34e8e014f4b0def,439adc05be4e18f89d8566106e96fccb6b327a55..39cf84a30b9f5a9eec899d96dd41ac12d63d2e6a
--- 1/include/linux/cpuhotplug.h
--- 2/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@@ -46,13 -46,13 +46,14 @@@ enum cpuhp_state 
         CPUHP_ARM_OMAP_WAKE_DEAD,
         CPUHP_IRQ_POLL_DEAD,
         CPUHP_BLOCK_SOFTIRQ_DEAD,
+ +      CPUHP_BIO_DEAD,
         CPUHP_ACPI_CPUDRV_DEAD,
         CPUHP_S390_PFAULT_DEAD,
         CPUHP_BLK_MQ_DEAD,
         CPUHP_FS_BUFF_DEAD,
         CPUHP_PRINTK_DEAD,
         CPUHP_MM_MEMCQ_DEAD,
+       CPUHP_XFS_DEAD,
         CPUHP_PERCPU_CNT_DEAD,
         CPUHP_RADIX_DEAD,
         CPUHP_PAGE_ALLOC,
@@@ -400,7 -400,7 +401,7 @@@ static inline int cpuhp_state_remove_in
   
   /**
    * cpuhp_state_remove_instance_nocalls - Remove hotplug instance from state
- - *                                     without invoking the reatdown callback
+ + *                                     without invoking the teardown callback
    * @state:    The state from which the instance is removed
    * @node:     The node for this individual state.
    *
author	Linus Torvalds <[email protected]>
	Thu, 2 Sep 2021 15:26:03 +0000 (08:26 -0700)
committer	Linus Torvalds <[email protected]>
	Thu, 2 Sep 2021 15:26:03 +0000 (08:26 -0700)
		1	2
fs/xfs/xfs_bmap_util.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_buf.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_inode.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/cpuhotplug.h	patch \|	diff1 \|	diff2 \|	blob \| history