Merge tag 'xfs-perag-conv-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc...

author Darrick J. Wong <[email protected]>

Tue, 8 Jun 2021 16:13:13 +0000 (09:13 -0700)

committer Darrick J. Wong <[email protected]>

Tue, 8 Jun 2021 16:13:13 +0000 (09:13 -0700)
author Darrick J. Wong <[email protected]>
Tue, 8 Jun 2021 16:13:13 +0000 (09:13 -0700)
committer Darrick J. Wong <[email protected]>
Tue, 8 Jun 2021 16:13:13 +0000 (09:13 -0700)
diff --combined fs/xfs/libxfs/xfs_ag.c

index be0087825ae06b2709f4c024362f92cfd3a81184,29c42698aa90f790131bdcce637e2dbbac852590..5315e3f572072054a0fa9dc0e2ad9111cea708ae
--- 1/fs/xfs/libxfs/xfs_ag.c
--- 2/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@@ -27,6 -27,276 +27,276 @@@
   #include "xfs_defer.h"
   #include "xfs_log_format.h"
   #include "xfs_trans.h"
+ #include "xfs_trace.h"
+ #include "xfs_inode.h"
+ #include "xfs_icache.h"
+ 
+ 
+ /*
+  * Passive reference counting access wrappers to the perag structures.  If the
+  * per-ag structure is to be freed, the freeing code is responsible for cleaning
+  * up objects with passive references before freeing the structure. This is
+  * things like cached buffers.
+  */
+ struct xfs_perag *
+ xfs_perag_get(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno)
+ {
+       struct xfs_perag        *pag;
+       int                     ref = 0;
+ 
+       rcu_read_lock();
+       pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+       if (pag) {
+               ASSERT(atomic_read(&pag->pag_ref) >= 0);
+               ref = atomic_inc_return(&pag->pag_ref);
+       }
+       rcu_read_unlock();
+       trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
+       return pag;
+ }
+ 
+ /*
+  * search from @first to find the next perag with the given tag set.
+  */
+ struct xfs_perag *
+ xfs_perag_get_tag(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          first,
+       int                     tag)
+ {
+       struct xfs_perag        *pag;
+       int                     found;
+       int                     ref;
+ 
+       rcu_read_lock();
+       found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+                                       (void **)&pag, first, 1, tag);
+       if (found <= 0) {
+               rcu_read_unlock();
+               return NULL;
+       }
+       ref = atomic_inc_return(&pag->pag_ref);
+       rcu_read_unlock();
+       trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
+       return pag;
+ }
+ 
+ void
+ xfs_perag_put(
+       struct xfs_perag        *pag)
+ {
+       int     ref;
+ 
+       ASSERT(atomic_read(&pag->pag_ref) > 0);
+       ref = atomic_dec_return(&pag->pag_ref);
+       trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
+ }
+ 
+ /*
+  * xfs_initialize_perag_data
+  *
+  * Read in each per-ag structure so we can count up the number of
+  * allocated inodes, free inodes and used filesystem blocks as this
+  * information is no longer persistent in the superblock. Once we have
+  * this information, write it into the in-core superblock structure.
+  */
+ int
+ xfs_initialize_perag_data(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agcount)
+ {
+       xfs_agnumber_t          index;
+       struct xfs_perag        *pag;
+       struct xfs_sb           *sbp = &mp->m_sb;
+       uint64_t                ifree = 0;
+       uint64_t                ialloc = 0;
+       uint64_t                bfree = 0;
+       uint64_t                bfreelst = 0;
+       uint64_t                btree = 0;
+       uint64_t                fdblocks;
+       int                     error = 0;
+ 
+       for (index = 0; index < agcount; index++) {
+               /*
+                * read the agf, then the agi. This gets us
+                * all the information we need and populates the
+                * per-ag structures for us.
+                */
+               error = xfs_alloc_pagf_init(mp, NULL, index, 0);
+               if (error)
+                       return error;
+ 
+               error = xfs_ialloc_pagi_init(mp, NULL, index);
+               if (error)
+                       return error;
+               pag = xfs_perag_get(mp, index);
+               ifree += pag->pagi_freecount;
+               ialloc += pag->pagi_count;
+               bfree += pag->pagf_freeblks;
+               bfreelst += pag->pagf_flcount;
+               btree += pag->pagf_btreeblks;
+               xfs_perag_put(pag);
+       }
+       fdblocks = bfree + bfreelst + btree;
+ 
+       /*
+        * If the new summary counts are obviously incorrect, fail the
+        * mount operation because that implies the AGFs are also corrupt.
+        * Clear FS_COUNTERS so that we don't unmount with a dirty log, which
+        * will prevent xfs_repair from fixing anything.
+        */
+       if (fdblocks > sbp->sb_dblocks || ifree > ialloc) {
+               xfs_alert(mp, "AGF corruption. Please run xfs_repair.");
+               error = -EFSCORRUPTED;
+               goto out;
+       }
+ 
+       /* Overwrite incore superblock counters with just-read data */
+       spin_lock(&mp->m_sb_lock);
+       sbp->sb_ifree = ifree;
+       sbp->sb_icount = ialloc;
+       sbp->sb_fdblocks = fdblocks;
+       spin_unlock(&mp->m_sb_lock);
+ 
+       xfs_reinit_percpu_counters(mp);
+ out:
+       xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
+       return error;
+ }
+ 
+ STATIC void
+ __xfs_free_perag(
+       struct rcu_head *head)
+ {
+       struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+ 
+       ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
+       ASSERT(atomic_read(&pag->pag_ref) == 0);
+       kmem_free(pag);
+ }
+ 
+ /*
+  * Free up the per-ag resources associated with the mount structure.
+  */
+ void
+ xfs_free_perag(
+       struct xfs_mount        *mp)
+ {
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          agno;
+ 
+       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+               spin_lock(&mp->m_perag_lock);
+               pag = radix_tree_delete(&mp->m_perag_tree, agno);
+               spin_unlock(&mp->m_perag_lock);
+               ASSERT(pag);
+               ASSERT(atomic_read(&pag->pag_ref) == 0);
+ 
+               cancel_delayed_work_sync(&pag->pag_blockgc_work);
+               xfs_iunlink_destroy(pag);
+               xfs_buf_hash_destroy(pag);
+ 
+               call_rcu(&pag->rcu_head, __xfs_free_perag);
+       }
+ }
+ 
+ int
+ xfs_initialize_perag(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agcount,
+       xfs_agnumber_t          *maxagi)
+ {
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          index;
+       xfs_agnumber_t          first_initialised = NULLAGNUMBER;
+       int                     error;
+ 
+       /*
+        * Walk the current per-ag tree so we don't try to initialise AGs
+        * that already exist (growfs case). Allocate and insert all the
+        * AGs we don't find ready for initialisation.
+        */
+       for (index = 0; index < agcount; index++) {
+               pag = xfs_perag_get(mp, index);
+               if (pag) {
+                       xfs_perag_put(pag);
+                       continue;
+               }
+ 
+               pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
+               if (!pag) {
+                       error = -ENOMEM;
+                       goto out_unwind_new_pags;
+               }
+               pag->pag_agno = index;
+               pag->pag_mount = mp;
+ 
+               error = radix_tree_preload(GFP_NOFS);
+               if (error)
+                       goto out_free_pag;
+ 
+               spin_lock(&mp->m_perag_lock);
+               if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
+                       WARN_ON_ONCE(1);
+                       spin_unlock(&mp->m_perag_lock);
+                       radix_tree_preload_end();
+                       error = -EEXIST;
+                       goto out_free_pag;
+               }
+               spin_unlock(&mp->m_perag_lock);
+               radix_tree_preload_end();
+ 
+               /* Place kernel structure only init below this point. */
+               spin_lock_init(&pag->pag_ici_lock);
+               spin_lock_init(&pag->pagb_lock);
+               spin_lock_init(&pag->pag_state_lock);
+               INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
+               INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+               init_waitqueue_head(&pag->pagb_wait);
+               pag->pagb_count = 0;
+               pag->pagb_tree = RB_ROOT;
+ 
+               error = xfs_buf_hash_init(pag);
+               if (error)
+                       goto out_remove_pag;
+ 
+               error = xfs_iunlink_init(pag);
+               if (error)
+                       goto out_hash_destroy;
+ 
+               /* first new pag is fully initialized */
+               if (first_initialised == NULLAGNUMBER)
+                       first_initialised = index;
+       }
+ 
+       index = xfs_set_inode_alloc(mp, agcount);
+ 
+       if (maxagi)
+               *maxagi = index;
+ 
+       mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
+       return 0;
+ 
+ out_hash_destroy:
+       xfs_buf_hash_destroy(pag);
+ out_remove_pag:
+       radix_tree_delete(&mp->m_perag_tree, index);
+ out_free_pag:
+       kmem_free(pag);
+ out_unwind_new_pags:
+       /* unwind any prior newly initialized pags */
+       for (index = first_initialised; index < agcount; index++) {
+               pag = radix_tree_delete(&mp->m_perag_tree, index);
+               if (!pag)
+                       break;
+               xfs_buf_hash_destroy(pag);
+               xfs_iunlink_destroy(pag);
+               kmem_free(pag);
+       }
+       return error;
+ }
   
   static int
   xfs_get_aghdr_buf(
@@@ -43,6 -313,7 +313,6 @@@
         if (error)
                 return error;
   
- -      xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
         bp->b_bn = blkno;
         bp->b_maps[0].bm_bn = blkno;
         bp->b_ops = ops;
@@@ -645,7 -916,7 +915,7 @@@ xfs_ag_extend_space
          * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that
          * this doesn't actually exist in the rmap btree.
          */
-       error = xfs_rmap_free(tp, bp, id->agno,
+       error = xfs_rmap_free(tp, bp, bp->b_pag,
                                 be32_to_cpu(agf->agf_length) - len,
                                 len, &XFS_RMAP_OINFO_SKIP_UPDATE);
         if (error)
diff --combined fs/xfs/libxfs/xfs_ag_resv.c

index bbfea8022a3b96c5bc25bb012b07a90f9241032b,f7394a8ecf6b911461bf27b9ba91735f36c3312c..f6f868020e2ed69c0a4999cf6da386ca8deb4c27
--- 1/fs/xfs/libxfs/xfs_ag_resv.c
--- 2/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@@ -19,7 -19,7 +19,7 @@@
   #include "xfs_btree.h"
   #include "xfs_refcount_btree.h"
   #include "xfs_ialloc_btree.h"
- #include "xfs_sb.h"
+ #include "xfs_ag.h"
   #include "xfs_ag_resv.h"
   
   /*
@@@ -250,7 -250,6 +250,6 @@@ xfs_ag_resv_init
         struct xfs_trans                *tp)
   {
         struct xfs_mount                *mp = pag->pag_mount;
-       xfs_agnumber_t                  agno = pag->pag_agno;
         xfs_extlen_t                    ask;
         xfs_extlen_t                    used;
         int                             error = 0, error2;
@@@ -260,11 -259,11 +259,11 @@@
         if (pag->pag_meta_resv.ar_asked == 0) {
                 ask = used = 0;
   
-               error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask, &used);
+               error = xfs_refcountbt_calc_reserves(mp, tp, pag, &ask, &used);
                 if (error)
                         goto out;
   
-               error = xfs_finobt_calc_reserves(mp, tp, agno, &ask, &used);
+               error = xfs_finobt_calc_reserves(mp, tp, pag, &ask, &used);
                 if (error)
                         goto out;
   
@@@ -282,7 -281,7 +281,7 @@@
   
                         mp->m_finobt_nores = true;
   
-                       error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask,
+                       error = xfs_refcountbt_calc_reserves(mp, tp, pag, &ask,
                                         &used);
                         if (error)
                                 goto out;
@@@ -300,7 -299,7 +299,7 @@@
         if (pag->pag_rmapbt_resv.ar_asked == 0) {
                 ask = used = 0;
   
-               error = xfs_rmapbt_calc_reserves(mp, tp, agno, &ask, &used);
+               error = xfs_rmapbt_calc_reserves(mp, tp, pag, &ask, &used);
                 if (error)
                         goto out;
   
@@@ -325,22 -324,10 +324,22 @@@ out
                 error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0);
                 if (error2)
                         return error2;
- -              ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
- -                     xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
- -                     pag->pagf_freeblks + pag->pagf_flcount);
+ +
+ +              /*
+ +               * If there isn't enough space in the AG to satisfy the
+ +               * reservation, let the caller know that there wasn't enough
+ +               * space.  Callers are responsible for deciding what to do
+ +               * next, since (in theory) we can stumble along with
+ +               * insufficient reservation if data blocks are being freed to
+ +               * replenish the AG's free space.
+ +               */
+ +              if (!error &&
+ +                  xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
+ +                  xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved >
+ +                  pag->pagf_freeblks + pag->pagf_flcount)
+ +                      error = -ENOSPC;
         }
+ +
         return error;
   }
   
diff --combined fs/xfs/libxfs/xfs_bmap.c

index a3e0e6f672d63ad29ec04e0704eb036cb8a12172,2086c55b67bdd2e24f238a46c13180c067d9a04b..7ae826cfe5a5fbdf90969c34bb9662e54a8447f9
--- 1/fs/xfs/libxfs/xfs_bmap.c
--- 2/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@@ -31,6 -31,7 +31,7 @@@
   #include "xfs_attr_leaf.h"
   #include "xfs_filestream.h"
   #include "xfs_rmap.h"
+ #include "xfs_ag.h"
   #include "xfs_ag_resv.h"
   #include "xfs_refcount.h"
   #include "xfs_icache.h"
@@@ -605,6 -606,7 +606,6 @@@ xfs_bmap_btree_to_extents
   
         ASSERT(cur);
         ASSERT(whichfork != XFS_COW_FORK);
- -      ASSERT(!xfs_need_iread_extents(ifp));
         ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
         ASSERT(be16_to_cpu(rblock->bb_level) == 1);
         ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
@@@ -5349,6 -5351,7 +5350,6 @@@ __xfs_bunmapi
         xfs_fsblock_t           sum;
         xfs_filblks_t           len = *rlen;    /* length to unmap in file */
         xfs_fileoff_t           max_len;
- -      xfs_agnumber_t          prev_agno = NULLAGNUMBER, agno;
         xfs_fileoff_t           end;
         struct xfs_iext_cursor  icur;
         bool                    done = false;
@@@ -5440,6 -5443,16 +5441,6 @@@
                 del = got;
                 wasdel = isnullstartblock(del.br_startblock);
   
- -              /*
- -               * Make sure we don't touch multiple AGF headers out of order
- -               * in a single transaction, as that could cause AB-BA deadlocks.
- -               */
- -              if (!wasdel && !isrt) {
- -                      agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
- -                      if (prev_agno != NULLAGNUMBER && prev_agno > agno)
- -                              break;
- -                      prev_agno = agno;
- -              }
                 if (got.br_startoff < start) {
                         del.br_startoff = start;
                         del.br_blockcount -= start - got.br_startoff;
diff --combined fs/xfs/scrub/common.c

index be38c960da85863ca6f530b2db3e52fc4f22cc80,64c3b9b78d0d435f7af1260bcc8208ebc650ef0c..cadfd57999092df41e1678fe7f88d7b0656413c9
--- 1/fs/xfs/scrub/common.c
--- 2/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@@ -12,7 -12,6 +12,6 @@@
   #include "xfs_btree.h"
   #include "xfs_log_format.h"
   #include "xfs_trans.h"
- #include "xfs_sb.h"
   #include "xfs_inode.h"
   #include "xfs_icache.h"
   #include "xfs_alloc.h"
@@@ -26,6 -25,7 +25,7 @@@
   #include "xfs_trans_priv.h"
   #include "xfs_attr.h"
   #include "xfs_reflink.h"
+ #include "xfs_ag.h"
   #include "scrub/scrub.h"
   #include "scrub/common.h"
   #include "scrub/trace.h"
@@@ -74,9 -74,7 +74,9 @@@ __xchk_process_error
                 return true;
         case -EDEADLOCK:
                 /* Used to restart an op with deadlock avoidance. */
- -              trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
+ +              trace_xchk_deadlock_retry(
+ +                              sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
+ +                              sc->sm, *error);
                 break;
         case -EFSBADCRC:
         case -EFSCORRUPTED:
@@@ -460,49 -458,48 +460,48 @@@ xchk_ag_btcur_init
         struct xchk_ag          *sa)
   {
         struct xfs_mount        *mp = sc->mp;
-       xfs_agnumber_t          agno = sa->agno;
   
         xchk_perag_get(sc->mp, sa);
         if (sa->agf_bp &&
             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
                 /* Set up a bnobt cursor for cross-referencing. */
                 sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
-                               agno, XFS_BTNUM_BNO);
+                               sa->pag, XFS_BTNUM_BNO);
         }
   
         if (sa->agf_bp &&
             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) {
                 /* Set up a cntbt cursor for cross-referencing. */
                 sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
-                               agno, XFS_BTNUM_CNT);
+                               sa->pag, XFS_BTNUM_CNT);
         }
   
         /* Set up a inobt cursor for cross-referencing. */
         if (sa->agi_bp &&
             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
                 sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
-                                       agno, XFS_BTNUM_INO);
+                               sa->pag, XFS_BTNUM_INO);
         }
   
         /* Set up a finobt cursor for cross-referencing. */
         if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb) &&
             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
                 sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
-                               agno, XFS_BTNUM_FINO);
+                               sa->pag, XFS_BTNUM_FINO);
         }
   
         /* Set up a rmapbt cursor for cross-referencing. */
         if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb) &&
             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
                 sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
-                               agno);
+                               sa->pag);
         }
   
         /* Set up a refcountbt cursor for cross-referencing. */
         if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb) &&
             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
                 sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
-                               sa->agf_bp, agno);
+                               sa->agf_bp, sa->pag);
         }
   }
   
diff --combined fs/xfs/xfs_buf.c

index a55471612150cfe70b21cad53ec36f5f879d94d9,5788b92fc0d2692044a40f3b483b4872bc94e69d..b4ee9d3532f038fcabf055838dfd87d1a32e3b5f
--- 1/fs/xfs/xfs_buf.c
--- 2/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@@ -10,7 -10,6 +10,6 @@@
   #include "xfs_format.h"
   #include "xfs_log_format.h"
   #include "xfs_trans_resv.h"
- #include "xfs_sb.h"
   #include "xfs_mount.h"
   #include "xfs_trace.h"
   #include "xfs_log.h"
@@@ -19,9 -18,13 +18,10 @@@
   #include "xfs_buf_item.h"
   #include "xfs_errortag.h"
   #include "xfs_error.h"
+ #include "xfs_ag.h"
   
   static kmem_zone_t *xfs_buf_zone;
   
- -#define xb_to_gfp(flags) \
- -      ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
- -
   /*
    * Locking orders
    *
@@@ -76,7 -79,7 +76,7 @@@ static inline in
   xfs_buf_vmap_len(
         struct xfs_buf  *bp)
   {
- -      return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
+ +      return (bp->b_page_count * PAGE_SIZE);
   }
   
   /*
@@@ -269,30 -272,51 +269,30 @@@ _xfs_buf_alloc
         return 0;
   }
   
- -/*
- - *    Allocate a page array capable of holding a specified number
- - *    of pages, and point the page buf at it.
- - */
- -STATIC int
- -_xfs_buf_get_pages(
- -      struct xfs_buf          *bp,
- -      int                     page_count)
+ +static void
+ +xfs_buf_free_pages(
+ +      struct xfs_buf  *bp)
   {
- -      /* Make sure that we have a page list */
- -      if (bp->b_pages == NULL) {
- -              bp->b_page_count = page_count;
- -              if (page_count <= XB_PAGES) {
- -                      bp->b_pages = bp->b_page_array;
- -              } else {
- -                      bp->b_pages = kmem_alloc(sizeof(struct page *) *
- -                                               page_count, KM_NOFS);
- -                      if (bp->b_pages == NULL)
- -                              return -ENOMEM;
- -              }
- -              memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
+ +      uint            i;
+ +
+ +      ASSERT(bp->b_flags & _XBF_PAGES);
+ +
+ +      if (xfs_buf_is_vmapped(bp))
+ +              vm_unmap_ram(bp->b_addr, bp->b_page_count);
+ +
+ +      for (i = 0; i < bp->b_page_count; i++) {
+ +              if (bp->b_pages[i])
+ +                      __free_page(bp->b_pages[i]);
         }
- -      return 0;
- -}
+ +      if (current->reclaim_state)
+ +              current->reclaim_state->reclaimed_slab += bp->b_page_count;
   
- -/*
- - *    Frees b_pages if it was allocated.
- - */
- -STATIC void
- -_xfs_buf_free_pages(
- -      struct xfs_buf  *bp)
- -{
- -      if (bp->b_pages != bp->b_page_array) {
+ +      if (bp->b_pages != bp->b_page_array)
                 kmem_free(bp->b_pages);
- -              bp->b_pages = NULL;
- -      }
+ +      bp->b_pages = NULL;
+ +      bp->b_flags &= ~_XBF_PAGES;
   }
   
- -/*
- - *    Releases the specified buffer.
- - *
- - *    The modification state of any associated pages is left unchanged.
- - *    The buffer must not be on any hash - use xfs_buf_rele instead for
- - *    hashed and refcounted buffers
- - */
   static void
   xfs_buf_free(
         struct xfs_buf          *bp)
@@@ -301,103 -325,137 +301,103 @@@
   
         ASSERT(list_empty(&bp->b_lru));
   
- -      if (bp->b_flags & _XBF_PAGES) {
- -              uint            i;
- -
- -              if (xfs_buf_is_vmapped(bp))
- -                      vm_unmap_ram(bp->b_addr - bp->b_offset,
- -                                      bp->b_page_count);
- -
- -              for (i = 0; i < bp->b_page_count; i++) {
- -                      struct page     *page = bp->b_pages[i];
- -
- -                      __free_page(page);
- -              }
- -              if (current->reclaim_state)
- -                      current->reclaim_state->reclaimed_slab +=
- -                                                      bp->b_page_count;
- -      } else if (bp->b_flags & _XBF_KMEM)
+ +      if (bp->b_flags & _XBF_PAGES)
+ +              xfs_buf_free_pages(bp);
+ +      else if (bp->b_flags & _XBF_KMEM)
                 kmem_free(bp->b_addr);
- -      _xfs_buf_free_pages(bp);
+ +
         xfs_buf_free_maps(bp);
         kmem_cache_free(xfs_buf_zone, bp);
   }
   
- -/*
- - * Allocates all the pages for buffer in question and builds it's page list.
- - */
- -STATIC int
- -xfs_buf_allocate_memory(
- -      struct xfs_buf          *bp,
- -      uint                    flags)
+ +static int
+ +xfs_buf_alloc_kmem(
+ +      struct xfs_buf  *bp,
+ +      xfs_buf_flags_t flags)
   {
- -      size_t                  size;
- -      size_t                  nbytes, offset;
- -      gfp_t                   gfp_mask = xb_to_gfp(flags);
- -      unsigned short          page_count, i;
- -      xfs_off_t               start, end;
- -      int                     error;
- -      xfs_km_flags_t          kmflag_mask = 0;
+ +      int             align_mask = xfs_buftarg_dma_alignment(bp->b_target);
+ +      xfs_km_flags_t  kmflag_mask = KM_NOFS;
+ +      size_t          size = BBTOB(bp->b_length);
   
- -      /*
- -       * assure zeroed buffer for non-read cases.
- -       */
- -      if (!(flags & XBF_READ)) {
+ +      /* Assure zeroed buffer for non-read cases. */
+ +      if (!(flags & XBF_READ))
                 kmflag_mask |= KM_ZERO;
- -              gfp_mask |= __GFP_ZERO;
- -      }
   
- -      /*
- -       * for buffers that are contained within a single page, just allocate
- -       * the memory from the heap - there's no need for the complexity of
- -       * page arrays to keep allocation down to order 0.
- -       */
- -      size = BBTOB(bp->b_length);
- -      if (size < PAGE_SIZE) {
- -              int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
- -              bp->b_addr = kmem_alloc_io(size, align_mask,
- -                                         KM_NOFS | kmflag_mask);
- -              if (!bp->b_addr) {
- -                      /* low memory - use alloc_page loop instead */
- -                      goto use_alloc_page;
- -              }
+ +      bp->b_addr = kmem_alloc_io(size, align_mask, kmflag_mask);
+ +      if (!bp->b_addr)
+ +              return -ENOMEM;
   
- -              if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
- -                  ((unsigned long)bp->b_addr & PAGE_MASK)) {
- -                      /* b_addr spans two pages - use alloc_page instead */
- -                      kmem_free(bp->b_addr);
- -                      bp->b_addr = NULL;
- -                      goto use_alloc_page;
- -              }
- -              bp->b_offset = offset_in_page(bp->b_addr);
- -              bp->b_pages = bp->b_page_array;
- -              bp->b_pages[0] = kmem_to_page(bp->b_addr);
- -              bp->b_page_count = 1;
- -              bp->b_flags |= _XBF_KMEM;
- -              return 0;
+ +      if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
+ +          ((unsigned long)bp->b_addr & PAGE_MASK)) {
+ +              /* b_addr spans two pages - use alloc_page instead */
+ +              kmem_free(bp->b_addr);
+ +              bp->b_addr = NULL;
+ +              return -ENOMEM;
         }
+ +      bp->b_offset = offset_in_page(bp->b_addr);
+ +      bp->b_pages = bp->b_page_array;
+ +      bp->b_pages[0] = kmem_to_page(bp->b_addr);
+ +      bp->b_page_count = 1;
+ +      bp->b_flags |= _XBF_KMEM;
+ +      return 0;
+ +}
   
- -use_alloc_page:
- -      start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
- -      end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
- -                                                              >> PAGE_SHIFT;
- -      page_count = end - start;
- -      error = _xfs_buf_get_pages(bp, page_count);
- -      if (unlikely(error))
- -              return error;
+ +static int
+ +xfs_buf_alloc_pages(
+ +      struct xfs_buf  *bp,
+ +      xfs_buf_flags_t flags)
+ +{
+ +      gfp_t           gfp_mask = __GFP_NOWARN;
+ +      long            filled = 0;
   
- -      offset = bp->b_offset;
+ +      if (flags & XBF_READ_AHEAD)
+ +              gfp_mask |= __GFP_NORETRY;
+ +      else
+ +              gfp_mask |= GFP_NOFS;
+ +
+ +      /* Make sure that we have a page list */
+ +      bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
+ +      if (bp->b_page_count <= XB_PAGES) {
+ +              bp->b_pages = bp->b_page_array;
+ +      } else {
+ +              bp->b_pages = kzalloc(sizeof(struct page *) * bp->b_page_count,
+ +                                      gfp_mask);
+ +              if (!bp->b_pages)
+ +                      return -ENOMEM;
+ +      }
         bp->b_flags |= _XBF_PAGES;
   
- -      for (i = 0; i < bp->b_page_count; i++) {
- -              struct page     *page;
- -              uint            retries = 0;
- -retry:
- -              page = alloc_page(gfp_mask);
- -              if (unlikely(page == NULL)) {
- -                      if (flags & XBF_READ_AHEAD) {
- -                              bp->b_page_count = i;
- -                              error = -ENOMEM;
- -                              goto out_free_pages;
- -                      }
+ +      /* Assure zeroed buffer for non-read cases. */
+ +      if (!(flags & XBF_READ))
+ +              gfp_mask |= __GFP_ZERO;
   
- -                      /*
- -                       * This could deadlock.
- -                       *
- -                       * But until all the XFS lowlevel code is revamped to
- -                       * handle buffer allocation failures we can't do much.
- -                       */
- -                      if (!(++retries % 100))
- -                              xfs_err(NULL,
- -              "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
- -                                      current->comm, current->pid,
- -                                      __func__, gfp_mask);
- -
- -                      XFS_STATS_INC(bp->b_mount, xb_page_retries);
- -                      congestion_wait(BLK_RW_ASYNC, HZ/50);
- -                      goto retry;
+ +      /*
+ +       * Bulk filling of pages can take multiple calls. Not filling the entire
+ +       * array is not an allocation failure, so don't back off if we get at
+ +       * least one extra page.
+ +       */
+ +      for (;;) {
+ +              long    last = filled;
+ +
+ +              filled = alloc_pages_bulk_array(gfp_mask, bp->b_page_count,
+ +                                              bp->b_pages);
+ +              if (filled == bp->b_page_count) {
+ +                      XFS_STATS_INC(bp->b_mount, xb_page_found);
+ +                      break;
                 }
   
- -              XFS_STATS_INC(bp->b_mount, xb_page_found);
+ +              if (filled != last)
+ +                      continue;
   
- -              nbytes = min_t(size_t, size, PAGE_SIZE - offset);
- -              size -= nbytes;
- -              bp->b_pages[i] = page;
- -              offset = 0;
+ +              if (flags & XBF_READ_AHEAD) {
+ +                      xfs_buf_free_pages(bp);
+ +                      return -ENOMEM;
+ +              }
+ +
+ +              XFS_STATS_INC(bp->b_mount, xb_page_retries);
+ +              congestion_wait(BLK_RW_ASYNC, HZ / 50);
         }
         return 0;
- -
- -out_free_pages:
- -      for (i = 0; i < bp->b_page_count; i++)
- -              __free_page(bp->b_pages[i]);
- -      bp->b_flags &= ~_XBF_PAGES;
- -      return error;
   }
   
   /*
@@@ -411,7 -469,7 +411,7 @@@ _xfs_buf_map_pages
         ASSERT(bp->b_flags & _XBF_PAGES);
         if (bp->b_page_count == 1) {
                 /* A single page buffer is always mappable */
- -              bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
+ +              bp->b_addr = page_address(bp->b_pages[0]);
         } else if (flags & XBF_UNMAPPED) {
                 bp->b_addr = NULL;
         } else {
@@@ -438,6 -496,7 +438,6 @@@
   
                 if (!bp->b_addr)
                         return -ENOMEM;
- -              bp->b_addr += bp->b_offset;
         }
   
         return 0;
@@@ -661,22 -720,17 +661,22 @@@ xfs_buf_get_map
         if (error)
                 return error;
   
- -      error = xfs_buf_allocate_memory(new_bp, flags);
- -      if (error) {
- -              xfs_buf_free(new_bp);
- -              return error;
+ +      /*
+ +       * For buffers that fit entirely within a single page, first attempt to
+ +       * allocate the memory from the heap to minimise memory usage. If we
+ +       * can't get heap memory for these small buffers, we fall back to using
+ +       * the page allocator.
+ +       */
+ +      if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
+ +          xfs_buf_alloc_kmem(new_bp, flags) < 0) {
+ +              error = xfs_buf_alloc_pages(new_bp, flags);
+ +              if (error)
+ +                      goto out_free_buf;
         }
   
         error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
- -      if (error) {
- -              xfs_buf_free(new_bp);
- -              return error;
- -      }
+ +      if (error)
+ +              goto out_free_buf;
   
         if (bp != new_bp)
                 xfs_buf_free(new_bp);
@@@ -704,9 -758,6 +704,9 @@@ found
         trace_xfs_buf_get(bp, flags, _RET_IP_);
         *bpp = bp;
         return 0;
+ +out_free_buf:
+ +      xfs_buf_free(new_bp);
+ +      return error;
   }
   
   int
@@@ -899,7 -950,8 +899,7 @@@ xfs_buf_get_uncached
         int                     flags,
         struct xfs_buf          **bpp)
   {
- -      unsigned long           page_count;
- -      int                     error, i;
+ +      int                     error;
         struct xfs_buf          *bp;
         DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
   
@@@ -908,25 -960,41 +908,25 @@@
         /* flags might contain irrelevant bits, pass only what we care about */
         error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
         if (error)
- -              goto fail;
+ +              return error;
   
- -      page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
- -      error = _xfs_buf_get_pages(bp, page_count);
+ +      error = xfs_buf_alloc_pages(bp, flags);
         if (error)
                 goto fail_free_buf;
   
- -      for (i = 0; i < page_count; i++) {
- -              bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
- -              if (!bp->b_pages[i]) {
- -                      error = -ENOMEM;
- -                      goto fail_free_mem;
- -              }
- -      }
- -      bp->b_flags |= _XBF_PAGES;
- -
         error = _xfs_buf_map_pages(bp, 0);
         if (unlikely(error)) {
                 xfs_warn(target->bt_mount,
                         "%s: failed to map pages", __func__);
- -              goto fail_free_mem;
+ +              goto fail_free_buf;
         }
   
         trace_xfs_buf_get_uncached(bp, _RET_IP_);
         *bpp = bp;
         return 0;
   
- - fail_free_mem:
- -      while (--i >= 0)
- -              __free_page(bp->b_pages[i]);
- -      _xfs_buf_free_pages(bp);
- - fail_free_buf:
- -      xfs_buf_free_maps(bp);
- -      kmem_cache_free(xfs_buf_zone, bp);
- - fail:
+ +fail_free_buf:
+ +      xfs_buf_free(bp);
         return error;
   }
   
@@@ -1654,6 -1722,7 +1654,6 @@@ xfs_buf_offset
         if (bp->b_addr)
                 return bp->b_addr + offset;
   
- -      offset += bp->b_offset;
         page = bp->b_pages[offset >> PAGE_SHIFT];
         return page_address(page) + (offset & (PAGE_SIZE-1));
   }
diff --combined fs/xfs/xfs_inode.c

index e4c2da4566f13f1748de5c8c5e54c683dd7be866,336c350206a809ca0f7b5aebb71ec51166b7e35b..3cdcfa67b2902b0db5c33d5007f52e7461eb7011
--- 1/fs/xfs/xfs_inode.c
--- 2/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@@ -11,7 -11,6 +11,6 @@@
   #include "xfs_format.h"
   #include "xfs_log_format.h"
   #include "xfs_trans_resv.h"
- #include "xfs_sb.h"
   #include "xfs_mount.h"
   #include "xfs_defer.h"
   #include "xfs_inode.h"
@@@ -35,6 -34,7 +34,7 @@@
   #include "xfs_log.h"
   #include "xfs_bmap_btree.h"
   #include "xfs_reflink.h"
+ #include "xfs_ag.h"
   
   kmem_zone_t *xfs_inode_zone;
   
@@@ -45,7 -45,8 +45,8 @@@
   #define       XFS_ITRUNC_MAX_EXTENTS  2
   
   STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
- STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *);
+ STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
+       struct xfs_inode *);
   
   /*
    * helper function to extract extent size hint from inode
@@@ -690,7 -691,6 +691,7 @@@ xfs_inode_inherit_flags
         const struct xfs_inode  *pip)
   {
         unsigned int            di_flags = 0;
+ +      xfs_failaddr_t          failaddr;
         umode_t                 mode = VFS_I(ip)->i_mode;
   
         if (S_ISDIR(mode)) {
@@@ -730,24 -730,6 +731,24 @@@
                 di_flags |= XFS_DIFLAG_FILESTREAM;
   
         ip->i_diflags |= di_flags;
+ +
+ +      /*
+ +       * Inode verifiers on older kernels only check that the extent size
+ +       * hint is an integer multiple of the rt extent size on realtime files.
+ +       * They did not check the hint alignment on a directory with both
+ +       * rtinherit and extszinherit flags set.  If the misaligned hint is
+ +       * propagated from a directory into a new realtime file, new file
+ +       * allocations will fail due to math errors in the rt allocator and/or
+ +       * trip the verifiers.  Validate the hint settings in the new file so
+ +       * that we don't let broken hints propagate.
+ +       */
+ +      failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
+ +                      VFS_I(ip)->i_mode, ip->i_diflags);
+ +      if (failaddr) {
+ +              ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+ +                                 XFS_DIFLAG_EXTSZINHERIT);
+ +              ip->i_extsize = 0;
+ +      }
   }
   
   /* Propagate di_flags2 from a parent inode to a child inode. */
@@@ -756,29 -738,19 +757,29 @@@ xfs_inode_inherit_flags2
         struct xfs_inode        *ip,
         const struct xfs_inode  *pip)
   {
+ +      xfs_failaddr_t          failaddr;
+ +
         if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
                 ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
                 ip->i_cowextsize = pip->i_cowextsize;
         }
         if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
                 ip->i_diflags2 |= XFS_DIFLAG2_DAX;
+ +
+ +      /* Don't let invalid cowextsize hints propagate. */
+ +      failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
+ +                      VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
+ +      if (failaddr) {
+ +              ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
+ +              ip->i_cowextsize = 0;
+ +      }
   }
   
   /*
    * Initialise a newly allocated inode and return the in-core inode to the
    * caller locked exclusively.
    */
- static int
+ int
   xfs_init_new_inode(
         struct user_namespace   *mnt_userns,
         struct xfs_trans        *tp,
@@@ -914,57 -886,6 +915,6 @@@
         return 0;
   }
   
- /*
-  * Allocates a new inode from disk and return a pointer to the incore copy. This
-  * routine will internally commit the current transaction and allocate a new one
-  * if we needed to allocate more on-disk free inodes to perform the requested
-  * operation.
-  *
-  * If we are allocating quota inodes, we do not have a parent inode to attach to
-  * or associate with (i.e. dp == NULL) because they are not linked into the
-  * directory structure - they are attached directly to the superblock - and so
-  * have no parent.
-  */
- int
- xfs_dir_ialloc(
-       struct user_namespace   *mnt_userns,
-       struct xfs_trans        **tpp,
-       struct xfs_inode        *dp,
-       umode_t                 mode,
-       xfs_nlink_t             nlink,
-       dev_t                   rdev,
-       prid_t                  prid,
-       bool                    init_xattrs,
-       struct xfs_inode        **ipp)
- {
-       struct xfs_buf          *agibp;
-       xfs_ino_t               parent_ino = dp ? dp->i_ino : 0;
-       xfs_ino_t               ino;
-       int                     error;
- 
-       ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
- 
-       /*
-        * Call the space management code to pick the on-disk inode to be
-        * allocated.
-        */
-       error = xfs_dialloc_select_ag(tpp, parent_ino, mode, &agibp);
-       if (error)
-               return error;
- 
-       if (!agibp)
-               return -ENOSPC;
- 
-       /* Allocate an inode from the selected AG */
-       error = xfs_dialloc_ag(*tpp, agibp, parent_ino, &ino);
-       if (error)
-               return error;
-       ASSERT(ino != NULLFSINO);
- 
-       return xfs_init_new_inode(mnt_userns, *tpp, dp, ino, mode, nlink, rdev,
-                                 prid, init_xattrs, ipp);
- }
- 
   /*
    * Decrement the link count on an inode & log the change.  If this causes the
    * link count to go to zero, move the inode to AGI unlinked list so that it can
@@@ -1022,6 -943,7 +972,7 @@@ xfs_create
         struct xfs_dquot        *pdqp = NULL;
         struct xfs_trans_res    *tres;
         uint                    resblks;
+       xfs_ino_t               ino;
   
         trace_xfs_create(dp, name);
   
@@@ -1078,14 -1000,16 +1029,16 @@@
          * entry pointing to them, but a directory also the "." entry
          * pointing to itself.
          */
-       error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, is_dir ? 2 : 1, rdev,
-                              prid, init_xattrs, &ip);
+       error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+       if (!error)
+               error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
+                               is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip);
         if (error)
                 goto out_trans_cancel;
   
         /*
          * Now we join the directory inode to the transaction.  We do not do it
-        * earlier because xfs_dir_ialloc might commit the previous transaction
+        * earlier because xfs_dialloc might commit the previous transaction
          * (and release all the locks).  An error from here on will result in
          * the transaction cancel unlocking dp so don't do it explicitly in the
          * error path.
@@@ -1175,6 -1099,7 +1128,7 @@@ xfs_create_tmpfile
         struct xfs_dquot        *pdqp = NULL;
         struct xfs_trans_res    *tres;
         uint                    resblks;
+       xfs_ino_t               ino;
   
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
@@@ -1199,8 -1124,10 +1153,10 @@@
         if (error)
                 goto out_release_dquots;
   
-       error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, 0, 0, prid,
-                               false, &ip);
+       error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+       if (!error)
+               error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
+                               0, 0, prid, false, &ip);
         if (error)
                 goto out_trans_cancel;
   
@@@ -1315,7 -1242,11 +1271,11 @@@ xfs_link
          * Handle initial link state of O_TMPFILE inode
          */
         if (VFS_I(sip)->i_nlink == 0) {
-               error = xfs_iunlink_remove(tp, sip);
+               struct xfs_perag        *pag;
+ 
+               pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino));
+               error = xfs_iunlink_remove(tp, pag, sip);
+               xfs_perag_put(pag);
                 if (error)
                         goto error_return;
         }
@@@ -2008,7 -1939,7 +1968,7 @@@ xfs_iunlink_destroy
   STATIC int
   xfs_iunlink_update_bucket(
         struct xfs_trans        *tp,
-       xfs_agnumber_t          agno,
+       struct xfs_perag        *pag,
         struct xfs_buf          *agibp,
         unsigned int            bucket_index,
         xfs_agino_t             new_agino)
@@@ -2017,10 -1948,10 +1977,10 @@@
         xfs_agino_t             old_value;
         int                     offset;
   
-       ASSERT(xfs_verify_agino_or_null(tp->t_mountp, agno, new_agino));
+       ASSERT(xfs_verify_agino_or_null(tp->t_mountp, pag->pag_agno, new_agino));
   
         old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
-       trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index,
+       trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
                         old_value, new_agino);
   
         /*
@@@ -2044,7 -1975,7 +2004,7 @@@
   STATIC void
   xfs_iunlink_update_dinode(
         struct xfs_trans        *tp,
-       xfs_agnumber_t          agno,
+       struct xfs_perag        *pag,
         xfs_agino_t             agino,
         struct xfs_buf          *ibp,
         struct xfs_dinode       *dip,
@@@ -2054,9 -1985,9 +2014,9 @@@
         struct xfs_mount        *mp = tp->t_mountp;
         int                     offset;
   
-       ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino));
+       ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino));
   
-       trace_xfs_iunlink_update_dinode(mp, agno, agino,
+       trace_xfs_iunlink_update_dinode(mp, pag->pag_agno, agino,
                         be32_to_cpu(dip->di_next_unlinked), next_agino);
   
         dip->di_next_unlinked = cpu_to_be32(next_agino);
@@@ -2074,7 -2005,7 +2034,7 @@@ STATIC in
   xfs_iunlink_update_inode(
         struct xfs_trans        *tp,
         struct xfs_inode        *ip,
-       xfs_agnumber_t          agno,
+       struct xfs_perag        *pag,
         xfs_agino_t             next_agino,
         xfs_agino_t             *old_next_agino)
   {
@@@ -2084,7 -2015,7 +2044,7 @@@
         xfs_agino_t             old_value;
         int                     error;
   
-       ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino));
+       ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino));
   
         error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp);
         if (error)
@@@ -2093,7 -2024,7 +2053,7 @@@
   
         /* Make sure the old pointer isn't garbage. */
         old_value = be32_to_cpu(dip->di_next_unlinked);
-       if (!xfs_verify_agino_or_null(mp, agno, old_value)) {
+       if (!xfs_verify_agino_or_null(mp, pag->pag_agno, old_value)) {
                 xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
                                 sizeof(*dip), __this_address);
                 error = -EFSCORRUPTED;
@@@ -2116,7 -2047,7 +2076,7 @@@
         }
   
         /* Ok, update the new pointer. */
-       xfs_iunlink_update_dinode(tp, agno, XFS_INO_TO_AGINO(mp, ip->i_ino),
+       xfs_iunlink_update_dinode(tp, pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
                         ibp, dip, &ip->i_imap, next_agino);
         return 0;
   out:
@@@ -2137,10 -2068,10 +2097,10 @@@ xfs_iunlink
         struct xfs_inode        *ip)
   {
         struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_perag        *pag;
         struct xfs_agi          *agi;
         struct xfs_buf          *agibp;
         xfs_agino_t             next_agino;
-       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
         xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
         short                   bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
         int                     error;
@@@ -2149,10 -2080,12 +2109,12 @@@
         ASSERT(VFS_I(ip)->i_mode != 0);
         trace_xfs_iunlink(ip);
   
+       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ 
         /* Get the agi buffer first.  It ensures lock ordering on the list. */
-       error = xfs_read_agi(mp, tp, agno, &agibp);
+       error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp);
         if (error)
-               return error;
+               goto out;
         agi = agibp->b_addr;
   
         /*
@@@ -2162,9 -2095,10 +2124,10 @@@
          */
         next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
         if (next_agino == agino ||
-           !xfs_verify_agino_or_null(mp, agno, next_agino)) {
+           !xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)) {
                 xfs_buf_mark_corrupt(agibp);
-               return -EFSCORRUPTED;
+               error = -EFSCORRUPTED;
+               goto out;
         }
   
         if (next_agino != NULLAGINO) {
@@@ -2174,23 -2108,26 +2137,26 @@@
                  * There is already another inode in the bucket, so point this
                  * inode to the current head of the list.
                  */
-               error = xfs_iunlink_update_inode(tp, ip, agno, next_agino,
+               error = xfs_iunlink_update_inode(tp, ip, pag, next_agino,
                                 &old_agino);
                 if (error)
-                       return error;
+                       goto out;
                 ASSERT(old_agino == NULLAGINO);
   
                 /*
                  * agino has been unlinked, add a backref from the next inode
                  * back to agino.
                  */
-               error = xfs_iunlink_add_backref(agibp->b_pag, agino, next_agino);
+               error = xfs_iunlink_add_backref(pag, agino, next_agino);
                 if (error)
-                       return error;
+                       goto out;
         }
   
         /* Point the head of the list to point to this inode. */
-       return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino);
+       error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
+ out:
+       xfs_perag_put(pag);
+       return error;
   }
   
   /* Return the imap, dinode pointer, and buffer for an inode. */
@@@ -2238,14 -2175,13 +2204,13 @@@ xfs_iunlink_map_ino
   STATIC int
   xfs_iunlink_map_prev(
         struct xfs_trans        *tp,
-       xfs_agnumber_t          agno,
+       struct xfs_perag        *pag,
         xfs_agino_t             head_agino,
         xfs_agino_t             target_agino,
         xfs_agino_t             *agino,
         struct xfs_imap         *imap,
         struct xfs_dinode       **dipp,
-       struct xfs_buf          **bpp,
-       struct xfs_perag        *pag)
+       struct xfs_buf          **bpp)
   {
         struct xfs_mount        *mp = tp->t_mountp;
         xfs_agino_t             next_agino;
@@@ -2257,7 -2193,8 +2222,8 @@@
         /* See if our backref cache can find it faster. */
         *agino = xfs_iunlink_lookup_backref(pag, target_agino);
         if (*agino != NULLAGINO) {
-               error = xfs_iunlink_map_ino(tp, agno, *agino, imap, dipp, bpp);
+               error = xfs_iunlink_map_ino(tp, pag->pag_agno, *agino, imap,
+                               dipp, bpp);
                 if (error)
                         return error;
   
@@@ -2273,7 -2210,7 +2239,7 @@@
                 WARN_ON_ONCE(1);
         }
   
-       trace_xfs_iunlink_map_prev_fallback(mp, agno);
+       trace_xfs_iunlink_map_prev_fallback(mp, pag->pag_agno);
   
         /* Otherwise, walk the entire bucket until we find it. */
         next_agino = head_agino;
@@@ -2284,8 -2221,8 +2250,8 @@@
                         xfs_trans_brelse(tp, *bpp);
   
                 *agino = next_agino;
-               error = xfs_iunlink_map_ino(tp, agno, next_agino, imap, dipp,
-                               bpp);
+               error = xfs_iunlink_map_ino(tp, pag->pag_agno, next_agino, imap,
+                               dipp, bpp);
                 if (error)
                         return error;
   
@@@ -2294,7 -2231,7 +2260,7 @@@
                  * Make sure this pointer is valid and isn't an obvious
                  * infinite loop.
                  */
-               if (!xfs_verify_agino(mp, agno, unlinked_agino) ||
+               if (!xfs_verify_agino(mp, pag->pag_agno, unlinked_agino) ||
                     next_agino == unlinked_agino) {
                         XFS_CORRUPTION_ERROR(__func__,
                                         XFS_ERRLEVEL_LOW, mp,
@@@ -2314,6 -2251,7 +2280,7 @@@
   STATIC int
   xfs_iunlink_remove(
         struct xfs_trans        *tp,
+       struct xfs_perag        *pag,
         struct xfs_inode        *ip)
   {
         struct xfs_mount        *mp = tp->t_mountp;
@@@ -2321,7 -2259,6 +2288,6 @@@
         struct xfs_buf          *agibp;
         struct xfs_buf          *last_ibp;
         struct xfs_dinode       *last_dip = NULL;
-       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
         xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
         xfs_agino_t             next_agino;
         xfs_agino_t             head_agino;
@@@ -2331,7 -2268,7 +2297,7 @@@
         trace_xfs_iunlink_remove(ip);
   
         /* Get the agi buffer first.  It ensures lock ordering on the list. */
-       error = xfs_read_agi(mp, tp, agno, &agibp);
+       error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp);
         if (error)
                 return error;
         agi = agibp->b_addr;
@@@ -2341,7 -2278,7 +2307,7 @@@
          * go on.  Make sure the head pointer isn't garbage.
          */
         head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
-       if (!xfs_verify_agino(mp, agno, head_agino)) {
+       if (!xfs_verify_agino(mp, pag->pag_agno, head_agino)) {
                 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
                                 agi, sizeof(*agi));
                 return -EFSCORRUPTED;
@@@ -2352,7 -2289,7 +2318,7 @@@
          * the old pointer value so that we can update whatever was previous
          * to us in the list to point to whatever was next in the list.
          */
-       error = xfs_iunlink_update_inode(tp, ip, agno, NULLAGINO, &next_agino);
+       error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO, &next_agino);
         if (error)
                 return error;
   
@@@ -2364,8 -2301,7 +2330,7 @@@
          * this inode's backref to point from the next inode.
          */
         if (next_agino != NULLAGINO) {
-               error = xfs_iunlink_change_backref(agibp->b_pag, next_agino,
-                               NULLAGINO);
+               error = xfs_iunlink_change_backref(pag, next_agino, NULLAGINO);
                 if (error)
                         return error;
         }
@@@ -2375,14 -2311,13 +2340,13 @@@
                 xfs_agino_t     prev_agino;
   
                 /* We need to search the list for the inode being freed. */
-               error = xfs_iunlink_map_prev(tp, agno, head_agino, agino,
-                               &prev_agino, &imap, &last_dip, &last_ibp,
-                               agibp->b_pag);
+               error = xfs_iunlink_map_prev(tp, pag, head_agino, agino,
+                               &prev_agino, &imap, &last_dip, &last_ibp);
                 if (error)
                         return error;
   
                 /* Point the previous inode on the list to the next inode. */
-               xfs_iunlink_update_dinode(tp, agno, prev_agino, last_ibp,
+               xfs_iunlink_update_dinode(tp, pag, prev_agino, last_ibp,
                                 last_dip, &imap, next_agino);
   
                 /*
@@@ -2398,7 -2333,7 +2362,7 @@@
         }
   
         /* Point the head of the list to the next unlinked inode. */
-       return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index,
+       return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
                         next_agino);
   }
   
@@@ -2409,12 -2344,11 +2373,11 @@@
    */
   static void
   xfs_ifree_mark_inode_stale(
-       struct xfs_buf          *bp,
+       struct xfs_perag        *pag,
         struct xfs_inode        *free_ip,
         xfs_ino_t               inum)
   {
-       struct xfs_mount        *mp = bp->b_mount;
-       struct xfs_perag        *pag = bp->b_pag;
+       struct xfs_mount        *mp = pag->pag_mount;
         struct xfs_inode_log_item *iip;
         struct xfs_inode        *ip;
   
@@@ -2504,10 -2438,11 +2467,11 @@@ out_iflags_unlock
    * inodes that are in memory - they all must be marked stale and attached to
    * the cluster buffer.
    */
- STATIC int
+ static int
   xfs_ifree_cluster(
-       struct xfs_inode        *free_ip,
         struct xfs_trans        *tp,
+       struct xfs_perag        *pag,
+       struct xfs_inode        *free_ip,
         struct xfs_icluster     *xic)
   {
         struct xfs_mount        *mp = free_ip->i_mount;
@@@ -2569,7 -2504,7 +2533,7 @@@
                  * already marked XFS_ISTALE.
                  */
                 for (i = 0; i < igeo->inodes_per_cluster; i++)
-                       xfs_ifree_mark_inode_stale(bp, free_ip, inum + i);
+                       xfs_ifree_mark_inode_stale(pag, free_ip, inum + i);
   
                 xfs_trans_stale_inode_buf(tp, bp);
                 xfs_trans_binval(tp, bp);
@@@ -2592,9 -2527,11 +2556,11 @@@ xfs_ifree
         struct xfs_trans        *tp,
         struct xfs_inode        *ip)
   {
-       int                     error;
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_perag        *pag;
         struct xfs_icluster     xic = { 0 };
         struct xfs_inode_log_item *iip = ip->i_itemp;
+       int                     error;
   
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
         ASSERT(VFS_I(ip)->i_nlink == 0);
@@@ -2602,16 -2539,18 +2568,18 @@@
         ASSERT(ip->i_disk_size == 0 || !S_ISREG(VFS_I(ip)->i_mode));
         ASSERT(ip->i_nblocks == 0);
   
+       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ 
         /*
          * Pull the on-disk inode from the AGI unlinked list.
          */
-       error = xfs_iunlink_remove(tp, ip);
+       error = xfs_iunlink_remove(tp, pag, ip);
         if (error)
-               return error;
+               goto out;
   
-       error = xfs_difree(tp, ip->i_ino, &xic);
+       error = xfs_difree(tp, pag, ip->i_ino, &xic);
         if (error)
-               return error;
+               goto out;
   
         /*
          * Free any local-format data sitting around before we reset the
@@@ -2626,7 -2565,7 +2594,7 @@@
   
         VFS_I(ip)->i_mode = 0;          /* mark incore inode as free */
         ip->i_diflags = 0;
-       ip->i_diflags2 = ip->i_mount->m_ino_geo.new_diflags2;
+       ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
         ip->i_forkoff = 0;              /* mark the attr fork not in use */
         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
         if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
@@@ -2645,8 -2584,9 +2613,9 @@@
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
   
         if (xic.deleted)
-               error = xfs_ifree_cluster(ip, tp, &xic);
- 
+               error = xfs_ifree_cluster(tp, pag, ip, &xic);
+ out:
+       xfs_perag_put(pag);
         return error;
   }
   
@@@ -3250,8 -3190,13 +3219,13 @@@ xfs_rename
          * in future.
          */
         if (wip) {
+               struct xfs_perag        *pag;
+ 
                 ASSERT(VFS_I(wip)->i_nlink == 0);
-               error = xfs_iunlink_remove(tp, wip);
+ 
+               pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, wip->i_ino));
+               error = xfs_iunlink_remove(tp, pag, wip);
+               xfs_perag_put(pag);
                 if (error)
                         goto out_trans_cancel;
author	Darrick J. Wong <[email protected]>
	Tue, 8 Jun 2021 16:13:13 +0000 (09:13 -0700)
committer	Darrick J. Wong <[email protected]>
	Tue, 8 Jun 2021 16:13:13 +0000 (09:13 -0700)
		1	2
fs/xfs/libxfs/xfs_ag.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/libxfs/xfs_ag_resv.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/libxfs/xfs_bmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/scrub/common.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_buf.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_inode.c	patch \|	diff1 \|	diff2 \|	blob \| history