]> Git Repo - J-linux.git/blob - fs/xfs/libxfs/xfs_bmap.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / fs / xfs / libxfs / xfs_bmap.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_dir2.h"
17 #include "xfs_inode.h"
18 #include "xfs_btree.h"
19 #include "xfs_trans.h"
20 #include "xfs_alloc.h"
21 #include "xfs_bmap.h"
22 #include "xfs_bmap_util.h"
23 #include "xfs_bmap_btree.h"
24 #include "xfs_rtbitmap.h"
25 #include "xfs_errortag.h"
26 #include "xfs_error.h"
27 #include "xfs_quota.h"
28 #include "xfs_trans_space.h"
29 #include "xfs_buf_item.h"
30 #include "xfs_trace.h"
31 #include "xfs_attr_leaf.h"
32 #include "xfs_filestream.h"
33 #include "xfs_rmap.h"
34 #include "xfs_ag.h"
35 #include "xfs_ag_resv.h"
36 #include "xfs_refcount.h"
37 #include "xfs_icache.h"
38 #include "xfs_iomap.h"
39 #include "xfs_health.h"
40 #include "xfs_bmap_item.h"
41 #include "xfs_symlink_remote.h"
42 #include "xfs_inode_util.h"
43 #include "xfs_rtgroup.h"
44
45 struct kmem_cache               *xfs_bmap_intent_cache;
46
47 /*
48  * Miscellaneous helper functions
49  */
50
51 /*
52  * Compute and fill in the value of the maximum depth of a bmap btree
53  * in this filesystem.  Done once, during mount.
54  */
55 void
56 xfs_bmap_compute_maxlevels(
57         xfs_mount_t     *mp,            /* file system mount structure */
58         int             whichfork)      /* data or attr fork */
59 {
60         uint64_t        maxblocks;      /* max blocks at this level */
61         xfs_extnum_t    maxleafents;    /* max leaf entries possible */
62         int             level;          /* btree level */
63         int             maxrootrecs;    /* max records in root block */
64         int             minleafrecs;    /* min records in leaf block */
65         int             minnoderecs;    /* min records in node block */
66         int             sz;             /* root block size */
67
68         /*
69          * The maximum number of extents in a fork, hence the maximum number of
70          * leaf entries, is controlled by the size of the on-disk extent count.
71          *
72          * Note that we can no longer assume that if we are in ATTR1 that the
73          * fork offset of all the inodes will be
74          * (xfs_default_attroffset(ip) >> 3) because we could have mounted with
75          * ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed
76          * but probably at various positions. Therefore, for both ATTR1 and
77          * ATTR2 we have to assume the worst case scenario of a minimum size
78          * available.
79          */
80         maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
81                                 whichfork);
82         if (whichfork == XFS_DATA_FORK)
83                 sz = xfs_bmdr_space_calc(MINDBTPTRS);
84         else
85                 sz = xfs_bmdr_space_calc(MINABTPTRS);
86
87         maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
88         minleafrecs = mp->m_bmap_dmnr[0];
89         minnoderecs = mp->m_bmap_dmnr[1];
90         maxblocks = howmany_64(maxleafents, minleafrecs);
91         for (level = 1; maxblocks > 1; level++) {
92                 if (maxblocks <= maxrootrecs)
93                         maxblocks = 1;
94                 else
95                         maxblocks = howmany_64(maxblocks, minnoderecs);
96         }
97         mp->m_bm_maxlevels[whichfork] = level;
98         ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk());
99 }
100
101 unsigned int
102 xfs_bmap_compute_attr_offset(
103         struct xfs_mount        *mp)
104 {
105         if (mp->m_sb.sb_inodesize == 256)
106                 return XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS);
107         return xfs_bmdr_space_calc(6 * MINABTPTRS);
108 }
109
110 STATIC int                              /* error */
111 xfs_bmbt_lookup_eq(
112         struct xfs_btree_cur    *cur,
113         struct xfs_bmbt_irec    *irec,
114         int                     *stat)  /* success/failure */
115 {
116         cur->bc_rec.b = *irec;
117         return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
118 }
119
120 STATIC int                              /* error */
121 xfs_bmbt_lookup_first(
122         struct xfs_btree_cur    *cur,
123         int                     *stat)  /* success/failure */
124 {
125         cur->bc_rec.b.br_startoff = 0;
126         cur->bc_rec.b.br_startblock = 0;
127         cur->bc_rec.b.br_blockcount = 0;
128         return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
129 }
130
131 /*
132  * Check if the inode needs to be converted to btree format.
133  */
134 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
135 {
136         struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
137
138         return whichfork != XFS_COW_FORK &&
139                 ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
140                 ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
141 }
142
143 /*
144  * Check if the inode should be converted to extent format.
145  */
146 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
147 {
148         struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
149
150         return whichfork != XFS_COW_FORK &&
151                 ifp->if_format == XFS_DINODE_FMT_BTREE &&
152                 ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
153 }
154
155 /*
156  * Update the record referred to by cur to the value given by irec
157  * This either works (return 0) or gets an EFSCORRUPTED error.
158  */
159 STATIC int
160 xfs_bmbt_update(
161         struct xfs_btree_cur    *cur,
162         struct xfs_bmbt_irec    *irec)
163 {
164         union xfs_btree_rec     rec;
165
166         xfs_bmbt_disk_set_all(&rec.bmbt, irec);
167         return xfs_btree_update(cur, &rec);
168 }
169
170 /*
171  * Compute the worst-case number of indirect blocks that will be used
172  * for ip's delayed extent of length "len".
173  */
174 STATIC xfs_filblks_t
175 xfs_bmap_worst_indlen(
176         xfs_inode_t     *ip,            /* incore inode pointer */
177         xfs_filblks_t   len)            /* delayed extent length */
178 {
179         int             level;          /* btree level number */
180         int             maxrecs;        /* maximum record count at this level */
181         xfs_mount_t     *mp;            /* mount structure */
182         xfs_filblks_t   rval;           /* return value */
183
184         mp = ip->i_mount;
185         maxrecs = mp->m_bmap_dmxr[0];
186         for (level = 0, rval = 0;
187              level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
188              level++) {
189                 len += maxrecs - 1;
190                 do_div(len, maxrecs);
191                 rval += len;
192                 if (len == 1)
193                         return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
194                                 level - 1;
195                 if (level == 0)
196                         maxrecs = mp->m_bmap_dmxr[1];
197         }
198         return rval;
199 }
200
201 /*
202  * Calculate the default attribute fork offset for newly created inodes.
203  */
204 uint
205 xfs_default_attroffset(
206         struct xfs_inode        *ip)
207 {
208         if (ip->i_df.if_format == XFS_DINODE_FMT_DEV)
209                 return roundup(sizeof(xfs_dev_t), 8);
210         return M_IGEO(ip->i_mount)->attr_fork_offset;
211 }
212
213 /*
214  * Helper routine to reset inode i_forkoff field when switching attribute fork
215  * from local to extent format - we reset it where possible to make space
216  * available for inline data fork extents.
217  */
218 STATIC void
219 xfs_bmap_forkoff_reset(
220         xfs_inode_t     *ip,
221         int             whichfork)
222 {
223         if (whichfork == XFS_ATTR_FORK &&
224             ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
225             ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
226                 uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
227
228                 if (dfl_forkoff > ip->i_forkoff)
229                         ip->i_forkoff = dfl_forkoff;
230         }
231 }
232
233 static int
234 xfs_bmap_read_buf(
235         struct xfs_mount        *mp,            /* file system mount point */
236         struct xfs_trans        *tp,            /* transaction pointer */
237         xfs_fsblock_t           fsbno,          /* file system block number */
238         struct xfs_buf          **bpp)          /* buffer for fsbno */
239 {
240         struct xfs_buf          *bp;            /* return value */
241         int                     error;
242
243         if (!xfs_verify_fsbno(mp, fsbno))
244                 return -EFSCORRUPTED;
245         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
246                         XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp,
247                         &xfs_bmbt_buf_ops);
248         if (!error) {
249                 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
250                 *bpp = bp;
251         }
252         return error;
253 }
254
255 #ifdef DEBUG
256 STATIC struct xfs_buf *
257 xfs_bmap_get_bp(
258         struct xfs_btree_cur    *cur,
259         xfs_fsblock_t           bno)
260 {
261         struct xfs_log_item     *lip;
262         int                     i;
263
264         if (!cur)
265                 return NULL;
266
267         for (i = 0; i < cur->bc_maxlevels; i++) {
268                 if (!cur->bc_levels[i].bp)
269                         break;
270                 if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno)
271                         return cur->bc_levels[i].bp;
272         }
273
274         /* Chase down all the log items to see if the bp is there */
275         list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
276                 struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip;
277
278                 if (bip->bli_item.li_type == XFS_LI_BUF &&
279                     xfs_buf_daddr(bip->bli_buf) == bno)
280                         return bip->bli_buf;
281         }
282
283         return NULL;
284 }
285
286 STATIC void
287 xfs_check_block(
288         struct xfs_btree_block  *block,
289         xfs_mount_t             *mp,
290         int                     root,
291         short                   sz)
292 {
293         int                     i, j, dmxr;
294         __be64                  *pp, *thispa;   /* pointer to block address */
295         xfs_bmbt_key_t          *prevp, *keyp;
296
297         ASSERT(be16_to_cpu(block->bb_level) > 0);
298
299         prevp = NULL;
300         for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
301                 dmxr = mp->m_bmap_dmxr[0];
302                 keyp = xfs_bmbt_key_addr(mp, block, i);
303
304                 if (prevp) {
305                         ASSERT(be64_to_cpu(prevp->br_startoff) <
306                                be64_to_cpu(keyp->br_startoff));
307                 }
308                 prevp = keyp;
309
310                 /*
311                  * Compare the block numbers to see if there are dups.
312                  */
313                 if (root)
314                         pp = xfs_bmap_broot_ptr_addr(mp, block, i, sz);
315                 else
316                         pp = xfs_bmbt_ptr_addr(mp, block, i, dmxr);
317
318                 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
319                         if (root)
320                                 thispa = xfs_bmap_broot_ptr_addr(mp, block, j, sz);
321                         else
322                                 thispa = xfs_bmbt_ptr_addr(mp, block, j, dmxr);
323                         if (*thispa == *pp) {
324                                 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld",
325                                         __func__, j, i,
326                                         (unsigned long long)be64_to_cpu(*thispa));
327                                 xfs_err(mp, "%s: ptrs are equal in node\n",
328                                         __func__);
329                                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
330                         }
331                 }
332         }
333 }
334
335 /*
336  * Check that the extents for the inode ip are in the right order in all
337  * btree leaves. THis becomes prohibitively expensive for large extent count
338  * files, so don't bother with inodes that have more than 10,000 extents in
339  * them. The btree record ordering checks will still be done, so for such large
340  * bmapbt constructs that is going to catch most corruptions.
341  */
342 STATIC void
343 xfs_bmap_check_leaf_extents(
344         struct xfs_btree_cur    *cur,   /* btree cursor or null */
345         xfs_inode_t             *ip,            /* incore inode pointer */
346         int                     whichfork)      /* data or attr fork */
347 {
348         struct xfs_mount        *mp = ip->i_mount;
349         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
350         struct xfs_btree_block  *block; /* current btree block */
351         xfs_fsblock_t           bno;    /* block # of "block" */
352         struct xfs_buf          *bp;    /* buffer for "block" */
353         int                     error;  /* error return value */
354         xfs_extnum_t            i=0, j; /* index into the extents list */
355         int                     level;  /* btree level, for checking */
356         __be64                  *pp;    /* pointer to block address */
357         xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
358         xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
359         xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
360         int                     bp_release = 0;
361
362         if (ifp->if_format != XFS_DINODE_FMT_BTREE)
363                 return;
364
365         /* skip large extent count inodes */
366         if (ip->i_df.if_nextents > 10000)
367                 return;
368
369         bno = NULLFSBLOCK;
370         block = ifp->if_broot;
371         /*
372          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
373          */
374         level = be16_to_cpu(block->bb_level);
375         ASSERT(level > 0);
376         xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
377         pp = xfs_bmap_broot_ptr_addr(mp, block, 1, ifp->if_broot_bytes);
378         bno = be64_to_cpu(*pp);
379
380         ASSERT(bno != NULLFSBLOCK);
381         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
382         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
383
384         /*
385          * Go down the tree until leaf level is reached, following the first
386          * pointer (leftmost) at each level.
387          */
388         while (level-- > 0) {
389                 /* See if buf is in cur first */
390                 bp_release = 0;
391                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
392                 if (!bp) {
393                         bp_release = 1;
394                         error = xfs_bmap_read_buf(mp, NULL, bno, &bp);
395                         if (xfs_metadata_is_sick(error))
396                                 xfs_btree_mark_sick(cur);
397                         if (error)
398                                 goto error_norelse;
399                 }
400                 block = XFS_BUF_TO_BLOCK(bp);
401                 if (level == 0)
402                         break;
403
404                 /*
405                  * Check this block for basic sanity (increasing keys and
406                  * no duplicate blocks).
407                  */
408
409                 xfs_check_block(block, mp, 0, 0);
410                 pp = xfs_bmbt_ptr_addr(mp, block, 1, mp->m_bmap_dmxr[1]);
411                 bno = be64_to_cpu(*pp);
412                 if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
413                         xfs_btree_mark_sick(cur);
414                         error = -EFSCORRUPTED;
415                         goto error0;
416                 }
417                 if (bp_release) {
418                         bp_release = 0;
419                         xfs_trans_brelse(NULL, bp);
420                 }
421         }
422
423         /*
424          * Here with bp and block set to the leftmost leaf node in the tree.
425          */
426         i = 0;
427
428         /*
429          * Loop over all leaf nodes checking that all extents are in the right order.
430          */
431         for (;;) {
432                 xfs_fsblock_t   nextbno;
433                 xfs_extnum_t    num_recs;
434
435
436                 num_recs = xfs_btree_get_numrecs(block);
437
438                 /*
439                  * Read-ahead the next leaf block, if any.
440                  */
441
442                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
443
444                 /*
445                  * Check all the extents to make sure they are OK.
446                  * If we had a previous block, the last entry should
447                  * conform with the first entry in this one.
448                  */
449
450                 ep = xfs_bmbt_rec_addr(mp, block, 1);
451                 if (i) {
452                         ASSERT(xfs_bmbt_disk_get_startoff(&last) +
453                                xfs_bmbt_disk_get_blockcount(&last) <=
454                                xfs_bmbt_disk_get_startoff(ep));
455                 }
456                 for (j = 1; j < num_recs; j++) {
457                         nextp = xfs_bmbt_rec_addr(mp, block, j + 1);
458                         ASSERT(xfs_bmbt_disk_get_startoff(ep) +
459                                xfs_bmbt_disk_get_blockcount(ep) <=
460                                xfs_bmbt_disk_get_startoff(nextp));
461                         ep = nextp;
462                 }
463
464                 last = *ep;
465                 i += num_recs;
466                 if (bp_release) {
467                         bp_release = 0;
468                         xfs_trans_brelse(NULL, bp);
469                 }
470                 bno = nextbno;
471                 /*
472                  * If we've reached the end, stop.
473                  */
474                 if (bno == NULLFSBLOCK)
475                         break;
476
477                 bp_release = 0;
478                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
479                 if (!bp) {
480                         bp_release = 1;
481                         error = xfs_bmap_read_buf(mp, NULL, bno, &bp);
482                         if (xfs_metadata_is_sick(error))
483                                 xfs_btree_mark_sick(cur);
484                         if (error)
485                                 goto error_norelse;
486                 }
487                 block = XFS_BUF_TO_BLOCK(bp);
488         }
489
490         return;
491
492 error0:
493         xfs_warn(mp, "%s: at error0", __func__);
494         if (bp_release)
495                 xfs_trans_brelse(NULL, bp);
496 error_norelse:
497         xfs_warn(mp, "%s: BAD after btree leaves for %llu extents",
498                 __func__, i);
499         xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
500         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
501         return;
502 }
503
504 /*
505  * Validate that the bmbt_irecs being returned from bmapi are valid
506  * given the caller's original parameters.  Specifically check the
507  * ranges of the returned irecs to ensure that they only extend beyond
508  * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
509  */
510 STATIC void
511 xfs_bmap_validate_ret(
512         xfs_fileoff_t           bno,
513         xfs_filblks_t           len,
514         uint32_t                flags,
515         xfs_bmbt_irec_t         *mval,
516         int                     nmap,
517         int                     ret_nmap)
518 {
519         int                     i;              /* index to map values */
520
521         ASSERT(ret_nmap <= nmap);
522
523         for (i = 0; i < ret_nmap; i++) {
524                 ASSERT(mval[i].br_blockcount > 0);
525                 if (!(flags & XFS_BMAPI_ENTIRE)) {
526                         ASSERT(mval[i].br_startoff >= bno);
527                         ASSERT(mval[i].br_blockcount <= len);
528                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
529                                bno + len);
530                 } else {
531                         ASSERT(mval[i].br_startoff < bno + len);
532                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
533                                bno);
534                 }
535                 ASSERT(i == 0 ||
536                        mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
537                        mval[i].br_startoff);
538                 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
539                        mval[i].br_startblock != HOLESTARTBLOCK);
540                 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
541                        mval[i].br_state == XFS_EXT_UNWRITTEN);
542         }
543 }
544
545 #else
546 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork)         do { } while (0)
547 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)    do { } while (0)
548 #endif /* DEBUG */
549
550 /*
551  * Inode fork format manipulation functions
552  */
553
554 /*
555  * Convert the inode format to extent format if it currently is in btree format,
556  * but the extent list is small enough that it fits into the extent format.
557  *
558  * Since the extents are already in-core, all we have to do is give up the space
559  * for the btree root and pitch the leaf block.
560  */
561 STATIC int                              /* error */
562 xfs_bmap_btree_to_extents(
563         struct xfs_trans        *tp,    /* transaction pointer */
564         struct xfs_inode        *ip,    /* incore inode pointer */
565         struct xfs_btree_cur    *cur,   /* btree cursor */
566         int                     *logflagsp, /* inode logging flags */
567         int                     whichfork)  /* data or attr fork */
568 {
569         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
570         struct xfs_mount        *mp = ip->i_mount;
571         struct xfs_btree_block  *rblock = ifp->if_broot;
572         struct xfs_btree_block  *cblock;/* child btree block */
573         xfs_fsblock_t           cbno;   /* child block number */
574         struct xfs_buf          *cbp;   /* child block's buffer */
575         int                     error;  /* error return value */
576         __be64                  *pp;    /* ptr to block address */
577         struct xfs_owner_info   oinfo;
578
579         /* check if we actually need the extent format first: */
580         if (!xfs_bmap_wants_extents(ip, whichfork))
581                 return 0;
582
583         ASSERT(cur);
584         ASSERT(whichfork != XFS_COW_FORK);
585         ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
586         ASSERT(be16_to_cpu(rblock->bb_level) == 1);
587         ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
588         ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false) == 1);
589
590         pp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, ifp->if_broot_bytes);
591         cbno = be64_to_cpu(*pp);
592 #ifdef DEBUG
593         if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) {
594                 xfs_btree_mark_sick(cur);
595                 return -EFSCORRUPTED;
596         }
597 #endif
598         error = xfs_bmap_read_buf(mp, tp, cbno, &cbp);
599         if (xfs_metadata_is_sick(error))
600                 xfs_btree_mark_sick(cur);
601         if (error)
602                 return error;
603         cblock = XFS_BUF_TO_BLOCK(cbp);
604         if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
605                 return error;
606
607         xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
608         error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
609                         XFS_AG_RESV_NONE, 0);
610         if (error)
611                 return error;
612
613         ip->i_nblocks--;
614         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
615         xfs_trans_binval(tp, cbp);
616         if (cur->bc_levels[0].bp == cbp)
617                 cur->bc_levels[0].bp = NULL;
618         xfs_iroot_realloc(ip, -1, whichfork);
619         ASSERT(ifp->if_broot == NULL);
620         ifp->if_format = XFS_DINODE_FMT_EXTENTS;
621         *logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
622         return 0;
623 }
624
625 /*
626  * Convert an extents-format file into a btree-format file.
627  * The new file will have a root block (in the inode) and a single child block.
628  */
629 STATIC int                                      /* error */
630 xfs_bmap_extents_to_btree(
631         struct xfs_trans        *tp,            /* transaction pointer */
632         struct xfs_inode        *ip,            /* incore inode pointer */
633         struct xfs_btree_cur    **curp,         /* cursor returned to caller */
634         int                     wasdel,         /* converting a delayed alloc */
635         int                     *logflagsp,     /* inode logging flags */
636         int                     whichfork)      /* data or attr fork */
637 {
638         struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
639         struct xfs_buf          *abp;           /* buffer for ablock */
640         struct xfs_alloc_arg    args;           /* allocation arguments */
641         struct xfs_bmbt_rec     *arp;           /* child record pointer */
642         struct xfs_btree_block  *block;         /* btree root block */
643         struct xfs_btree_cur    *cur;           /* bmap btree cursor */
644         int                     error;          /* error return value */
645         struct xfs_ifork        *ifp;           /* inode fork pointer */
646         struct xfs_bmbt_key     *kp;            /* root block key pointer */
647         struct xfs_mount        *mp;            /* mount structure */
648         xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
649         struct xfs_iext_cursor  icur;
650         struct xfs_bmbt_irec    rec;
651         xfs_extnum_t            cnt = 0;
652
653         mp = ip->i_mount;
654         ASSERT(whichfork != XFS_COW_FORK);
655         ifp = xfs_ifork_ptr(ip, whichfork);
656         ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
657
658         /*
659          * Make space in the inode incore. This needs to be undone if we fail
660          * to expand the root.
661          */
662         xfs_iroot_realloc(ip, 1, whichfork);
663
664         /*
665          * Fill in the root.
666          */
667         block = ifp->if_broot;
668         xfs_bmbt_init_block(ip, block, NULL, 1, 1);
669         /*
670          * Need a cursor.  Can't allocate until bb_level is filled in.
671          */
672         cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
673         if (wasdel)
674                 cur->bc_flags |= XFS_BTREE_BMBT_WASDEL;
675         /*
676          * Convert to a btree with two levels, one record in root.
677          */
678         ifp->if_format = XFS_DINODE_FMT_BTREE;
679         memset(&args, 0, sizeof(args));
680         args.tp = tp;
681         args.mp = mp;
682         xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
683
684         args.minlen = args.maxlen = args.prod = 1;
685         args.wasdel = wasdel;
686         *logflagsp = 0;
687         error = xfs_alloc_vextent_start_ag(&args,
688                                 XFS_INO_TO_FSB(mp, ip->i_ino));
689         if (error)
690                 goto out_root_realloc;
691
692         /*
693          * Allocation can't fail, the space was reserved.
694          */
695         if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
696                 error = -ENOSPC;
697                 goto out_root_realloc;
698         }
699
700         cur->bc_bmap.allocated++;
701         ip->i_nblocks++;
702         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
703         error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
704                         XFS_FSB_TO_DADDR(mp, args.fsbno),
705                         mp->m_bsize, 0, &abp);
706         if (error)
707                 goto out_unreserve_dquot;
708
709         /*
710          * Fill in the child block.
711          */
712         ablock = XFS_BUF_TO_BLOCK(abp);
713         xfs_bmbt_init_block(ip, ablock, abp, 0, 0);
714
715         for_each_xfs_iext(ifp, &icur, &rec) {
716                 if (isnullstartblock(rec.br_startblock))
717                         continue;
718                 arp = xfs_bmbt_rec_addr(mp, ablock, 1 + cnt);
719                 xfs_bmbt_disk_set_all(arp, &rec);
720                 cnt++;
721         }
722         ASSERT(cnt == ifp->if_nextents);
723         xfs_btree_set_numrecs(ablock, cnt);
724
725         /*
726          * Fill in the root key and pointer.
727          */
728         kp = xfs_bmbt_key_addr(mp, block, 1);
729         arp = xfs_bmbt_rec_addr(mp, ablock, 1);
730         kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
731         pp = xfs_bmbt_ptr_addr(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
732                                                 be16_to_cpu(block->bb_level)));
733         *pp = cpu_to_be64(args.fsbno);
734
735         /*
736          * Do all this logging at the end so that
737          * the root is at the right level.
738          */
739         xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
740         xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
741         ASSERT(*curp == NULL);
742         *curp = cur;
743         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
744         return 0;
745
746 out_unreserve_dquot:
747         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
748 out_root_realloc:
749         xfs_iroot_realloc(ip, -1, whichfork);
750         ifp->if_format = XFS_DINODE_FMT_EXTENTS;
751         ASSERT(ifp->if_broot == NULL);
752         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
753
754         return error;
755 }
756
757 /*
758  * Convert a local file to an extents file.
759  * This code is out of bounds for data forks of regular files,
760  * since the file data needs to get logged so things will stay consistent.
761  * (The bmap-level manipulations are ok, though).
762  */
763 void
764 xfs_bmap_local_to_extents_empty(
765         struct xfs_trans        *tp,
766         struct xfs_inode        *ip,
767         int                     whichfork)
768 {
769         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
770
771         ASSERT(whichfork != XFS_COW_FORK);
772         ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
773         ASSERT(ifp->if_bytes == 0);
774         ASSERT(ifp->if_nextents == 0);
775
776         xfs_bmap_forkoff_reset(ip, whichfork);
777         ifp->if_data = NULL;
778         ifp->if_height = 0;
779         ifp->if_format = XFS_DINODE_FMT_EXTENTS;
780         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
781 }
782
783
784 int                                     /* error */
785 xfs_bmap_local_to_extents(
786         xfs_trans_t     *tp,            /* transaction pointer */
787         xfs_inode_t     *ip,            /* incore inode pointer */
788         xfs_extlen_t    total,          /* total blocks needed by transaction */
789         int             *logflagsp,     /* inode logging flags */
790         int             whichfork,
791         void            (*init_fn)(struct xfs_trans *tp,
792                                    struct xfs_buf *bp,
793                                    struct xfs_inode *ip,
794                                    struct xfs_ifork *ifp, void *priv),
795         void            *priv)
796 {
797         int             error = 0;
798         int             flags;          /* logging flags returned */
799         struct xfs_ifork *ifp;          /* inode fork pointer */
800         xfs_alloc_arg_t args;           /* allocation arguments */
801         struct xfs_buf  *bp;            /* buffer for extent block */
802         struct xfs_bmbt_irec rec;
803         struct xfs_iext_cursor icur;
804
805         /*
806          * We don't want to deal with the case of keeping inode data inline yet.
807          * So sending the data fork of a regular inode is invalid.
808          */
809         ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
810         ifp = xfs_ifork_ptr(ip, whichfork);
811         ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
812
813         if (!ifp->if_bytes) {
814                 xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
815                 flags = XFS_ILOG_CORE;
816                 goto done;
817         }
818
819         flags = 0;
820         error = 0;
821         memset(&args, 0, sizeof(args));
822         args.tp = tp;
823         args.mp = ip->i_mount;
824         args.total = total;
825         args.minlen = args.maxlen = args.prod = 1;
826         xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
827
828         /*
829          * Allocate a block.  We know we need only one, since the
830          * file currently fits in an inode.
831          */
832         args.total = total;
833         args.minlen = args.maxlen = args.prod = 1;
834         error = xfs_alloc_vextent_start_ag(&args,
835                         XFS_INO_TO_FSB(args.mp, ip->i_ino));
836         if (error)
837                 goto done;
838
839         /* Can't fail, the space was reserved. */
840         ASSERT(args.fsbno != NULLFSBLOCK);
841         ASSERT(args.len == 1);
842         error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
843                         XFS_FSB_TO_DADDR(args.mp, args.fsbno),
844                         args.mp->m_bsize, 0, &bp);
845         if (error)
846                 goto done;
847
848         /*
849          * Initialize the block, copy the data and log the remote buffer.
850          *
851          * The callout is responsible for logging because the remote format
852          * might differ from the local format and thus we don't know how much to
853          * log here. Note that init_fn must also set the buffer log item type
854          * correctly.
855          */
856         init_fn(tp, bp, ip, ifp, priv);
857
858         /* account for the change in fork size */
859         xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
860         xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
861         flags |= XFS_ILOG_CORE;
862
863         ifp->if_data = NULL;
864         ifp->if_height = 0;
865
866         rec.br_startoff = 0;
867         rec.br_startblock = args.fsbno;
868         rec.br_blockcount = 1;
869         rec.br_state = XFS_EXT_NORM;
870         xfs_iext_first(ifp, &icur);
871         xfs_iext_insert(ip, &icur, &rec, 0);
872
873         ifp->if_nextents = 1;
874         ip->i_nblocks = 1;
875         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
876         flags |= xfs_ilog_fext(whichfork);
877
878 done:
879         *logflagsp = flags;
880         return error;
881 }
882
883 /*
884  * Called from xfs_bmap_add_attrfork to handle btree format files.
885  */
886 STATIC int                                      /* error */
887 xfs_bmap_add_attrfork_btree(
888         xfs_trans_t             *tp,            /* transaction pointer */
889         xfs_inode_t             *ip,            /* incore inode pointer */
890         int                     *flags)         /* inode logging flags */
891 {
892         struct xfs_btree_block  *block = ip->i_df.if_broot;
893         struct xfs_btree_cur    *cur;           /* btree cursor */
894         int                     error;          /* error return value */
895         xfs_mount_t             *mp;            /* file system mount struct */
896         int                     stat;           /* newroot status */
897
898         mp = ip->i_mount;
899
900         if (xfs_bmap_bmdr_space(block) <= xfs_inode_data_fork_size(ip))
901                 *flags |= XFS_ILOG_DBROOT;
902         else {
903                 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
904                 error = xfs_bmbt_lookup_first(cur, &stat);
905                 if (error)
906                         goto error0;
907                 /* must be at least one entry */
908                 if (XFS_IS_CORRUPT(mp, stat != 1)) {
909                         xfs_btree_mark_sick(cur);
910                         error = -EFSCORRUPTED;
911                         goto error0;
912                 }
913                 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
914                         goto error0;
915                 if (stat == 0) {
916                         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
917                         return -ENOSPC;
918                 }
919                 cur->bc_bmap.allocated = 0;
920                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
921         }
922         return 0;
923 error0:
924         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
925         return error;
926 }
927
928 /*
929  * Called from xfs_bmap_add_attrfork to handle extents format files.
930  */
931 STATIC int                                      /* error */
932 xfs_bmap_add_attrfork_extents(
933         struct xfs_trans        *tp,            /* transaction pointer */
934         struct xfs_inode        *ip,            /* incore inode pointer */
935         int                     *flags)         /* inode logging flags */
936 {
937         struct xfs_btree_cur    *cur;           /* bmap btree cursor */
938         int                     error;          /* error return value */
939
940         if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
941             xfs_inode_data_fork_size(ip))
942                 return 0;
943         cur = NULL;
944         error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
945                                           XFS_DATA_FORK);
946         if (cur) {
947                 cur->bc_bmap.allocated = 0;
948                 xfs_btree_del_cursor(cur, error);
949         }
950         return error;
951 }
952
953 /*
954  * Called from xfs_bmap_add_attrfork to handle local format files. Each
955  * different data fork content type needs a different callout to do the
956  * conversion. Some are basic and only require special block initialisation
957  * callouts for the data formating, others (directories) are so specialised they
958  * handle everything themselves.
959  *
960  * XXX (dgc): investigate whether directory conversion can use the generic
961  * formatting callout. It should be possible - it's just a very complex
962  * formatter.
963  */
964 STATIC int                                      /* error */
965 xfs_bmap_add_attrfork_local(
966         struct xfs_trans        *tp,            /* transaction pointer */
967         struct xfs_inode        *ip,            /* incore inode pointer */
968         int                     *flags)         /* inode logging flags */
969 {
970         struct xfs_da_args      dargs;          /* args for dir/attr code */
971
972         if (ip->i_df.if_bytes <= xfs_inode_data_fork_size(ip))
973                 return 0;
974
975         if (S_ISDIR(VFS_I(ip)->i_mode)) {
976                 memset(&dargs, 0, sizeof(dargs));
977                 dargs.geo = ip->i_mount->m_dir_geo;
978                 dargs.dp = ip;
979                 dargs.total = dargs.geo->fsbcount;
980                 dargs.whichfork = XFS_DATA_FORK;
981                 dargs.trans = tp;
982                 dargs.owner = ip->i_ino;
983                 return xfs_dir2_sf_to_block(&dargs);
984         }
985
986         if (S_ISLNK(VFS_I(ip)->i_mode))
987                 return xfs_bmap_local_to_extents(tp, ip, 1, flags,
988                                 XFS_DATA_FORK, xfs_symlink_local_to_remote,
989                                 NULL);
990
991         /* should only be called for types that support local format data */
992         ASSERT(0);
993         xfs_bmap_mark_sick(ip, XFS_ATTR_FORK);
994         return -EFSCORRUPTED;
995 }
996
997 /*
998  * Set an inode attr fork offset based on the format of the data fork.
999  */
1000 static int
1001 xfs_bmap_set_attrforkoff(
1002         struct xfs_inode        *ip,
1003         int                     size,
1004         int                     *version)
1005 {
1006         int                     default_size = xfs_default_attroffset(ip) >> 3;
1007
1008         switch (ip->i_df.if_format) {
1009         case XFS_DINODE_FMT_DEV:
1010                 ip->i_forkoff = default_size;
1011                 break;
1012         case XFS_DINODE_FMT_LOCAL:
1013         case XFS_DINODE_FMT_EXTENTS:
1014         case XFS_DINODE_FMT_BTREE:
1015                 ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1016                 if (!ip->i_forkoff)
1017                         ip->i_forkoff = default_size;
1018                 else if (xfs_has_attr2(ip->i_mount) && version)
1019                         *version = 2;
1020                 break;
1021         default:
1022                 ASSERT(0);
1023                 return -EINVAL;
1024         }
1025
1026         return 0;
1027 }
1028
1029 /*
1030  * Convert inode from non-attributed to attributed.  Caller must hold the
1031  * ILOCK_EXCL and the file cannot have an attr fork.
1032  */
1033 int                                             /* error code */
1034 xfs_bmap_add_attrfork(
1035         struct xfs_trans        *tp,
1036         struct xfs_inode        *ip,            /* incore inode pointer */
1037         int                     size,           /* space new attribute needs */
1038         int                     rsvd)           /* xact may use reserved blks */
1039 {
1040         struct xfs_mount        *mp = tp->t_mountp;
1041         int                     version = 1;    /* superblock attr version */
1042         int                     logflags;       /* logging flags */
1043         int                     error;          /* error return value */
1044
1045         xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
1046         if (xfs_is_metadir_inode(ip))
1047                 ASSERT(XFS_IS_DQDETACHED(ip));
1048         else
1049                 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1050         ASSERT(!xfs_inode_has_attr_fork(ip));
1051
1052         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1053         error = xfs_bmap_set_attrforkoff(ip, size, &version);
1054         if (error)
1055                 return error;
1056
1057         xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
1058         logflags = 0;
1059         switch (ip->i_df.if_format) {
1060         case XFS_DINODE_FMT_LOCAL:
1061                 error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1062                 break;
1063         case XFS_DINODE_FMT_EXTENTS:
1064                 error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1065                 break;
1066         case XFS_DINODE_FMT_BTREE:
1067                 error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1068                 break;
1069         default:
1070                 error = 0;
1071                 break;
1072         }
1073         if (logflags)
1074                 xfs_trans_log_inode(tp, ip, logflags);
1075         if (error)
1076                 return error;
1077         if (!xfs_has_attr(mp) ||
1078            (!xfs_has_attr2(mp) && version == 2)) {
1079                 bool log_sb = false;
1080
1081                 spin_lock(&mp->m_sb_lock);
1082                 if (!xfs_has_attr(mp)) {
1083                         xfs_add_attr(mp);
1084                         log_sb = true;
1085                 }
1086                 if (!xfs_has_attr2(mp) && version == 2) {
1087                         xfs_add_attr2(mp);
1088                         log_sb = true;
1089                 }
1090                 spin_unlock(&mp->m_sb_lock);
1091                 if (log_sb)
1092                         xfs_log_sb(tp);
1093         }
1094
1095         return 0;
1096 }
1097
1098 /*
1099  * Internal and external extent tree search functions.
1100  */
1101
1102 struct xfs_iread_state {
1103         struct xfs_iext_cursor  icur;
1104         xfs_extnum_t            loaded;
1105 };
1106
1107 int
1108 xfs_bmap_complain_bad_rec(
1109         struct xfs_inode                *ip,
1110         int                             whichfork,
1111         xfs_failaddr_t                  fa,
1112         const struct xfs_bmbt_irec      *irec)
1113 {
1114         struct xfs_mount                *mp = ip->i_mount;
1115         const char                      *forkname;
1116
1117         switch (whichfork) {
1118         case XFS_DATA_FORK:     forkname = "data"; break;
1119         case XFS_ATTR_FORK:     forkname = "attr"; break;
1120         case XFS_COW_FORK:      forkname = "CoW"; break;
1121         default:                forkname = "???"; break;
1122         }
1123
1124         xfs_warn(mp,
1125  "Bmap BTree record corruption in inode 0x%llx %s fork detected at %pS!",
1126                                 ip->i_ino, forkname, fa);
1127         xfs_warn(mp,
1128                 "Offset 0x%llx, start block 0x%llx, block count 0x%llx state 0x%x",
1129                 irec->br_startoff, irec->br_startblock, irec->br_blockcount,
1130                 irec->br_state);
1131
1132         return -EFSCORRUPTED;
1133 }
1134
1135 /* Stuff every bmbt record from this block into the incore extent map. */
1136 static int
1137 xfs_iread_bmbt_block(
1138         struct xfs_btree_cur    *cur,
1139         int                     level,
1140         void                    *priv)
1141 {
1142         struct xfs_iread_state  *ir = priv;
1143         struct xfs_mount        *mp = cur->bc_mp;
1144         struct xfs_inode        *ip = cur->bc_ino.ip;
1145         struct xfs_btree_block  *block;
1146         struct xfs_buf          *bp;
1147         struct xfs_bmbt_rec     *frp;
1148         xfs_extnum_t            num_recs;
1149         xfs_extnum_t            j;
1150         int                     whichfork = cur->bc_ino.whichfork;
1151         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
1152
1153         block = xfs_btree_get_block(cur, level, &bp);
1154
1155         /* Abort if we find more records than nextents. */
1156         num_recs = xfs_btree_get_numrecs(block);
1157         if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1158                 xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1159                                 (unsigned long long)ip->i_ino);
1160                 xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1161                                 sizeof(*block), __this_address);
1162                 xfs_bmap_mark_sick(ip, whichfork);
1163                 return -EFSCORRUPTED;
1164         }
1165
1166         /* Copy records into the incore cache. */
1167         frp = xfs_bmbt_rec_addr(mp, block, 1);
1168         for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1169                 struct xfs_bmbt_irec    new;
1170                 xfs_failaddr_t          fa;
1171
1172                 xfs_bmbt_disk_get_all(frp, &new);
1173                 fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1174                 if (fa) {
1175                         xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1176                                         "xfs_iread_extents(2)", frp,
1177                                         sizeof(*frp), fa);
1178                         xfs_bmap_mark_sick(ip, whichfork);
1179                         return xfs_bmap_complain_bad_rec(ip, whichfork, fa,
1180                                         &new);
1181                 }
1182                 xfs_iext_insert(ip, &ir->icur, &new,
1183                                 xfs_bmap_fork_to_state(whichfork));
1184                 trace_xfs_read_extent(ip, &ir->icur,
1185                                 xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1186                 xfs_iext_next(ifp, &ir->icur);
1187         }
1188
1189         return 0;
1190 }
1191
1192 /*
1193  * Read in extents from a btree-format inode.
1194  */
1195 int
1196 xfs_iread_extents(
1197         struct xfs_trans        *tp,
1198         struct xfs_inode        *ip,
1199         int                     whichfork)
1200 {
1201         struct xfs_iread_state  ir;
1202         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
1203         struct xfs_mount        *mp = ip->i_mount;
1204         struct xfs_btree_cur    *cur;
1205         int                     error;
1206
1207         if (!xfs_need_iread_extents(ifp))
1208                 return 0;
1209
1210         xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
1211
1212         ir.loaded = 0;
1213         xfs_iext_first(ifp, &ir.icur);
1214         cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1215         error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1216                         XFS_BTREE_VISIT_RECORDS, &ir);
1217         xfs_btree_del_cursor(cur, error);
1218         if (error)
1219                 goto out;
1220
1221         if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1222                 xfs_bmap_mark_sick(ip, whichfork);
1223                 error = -EFSCORRUPTED;
1224                 goto out;
1225         }
1226         ASSERT(ir.loaded == xfs_iext_count(ifp));
1227         /*
1228          * Use release semantics so that we can use acquire semantics in
1229          * xfs_need_iread_extents and be guaranteed to see a valid mapping tree
1230          * after that load.
1231          */
1232         smp_store_release(&ifp->if_needextents, 0);
1233         return 0;
1234 out:
1235         if (xfs_metadata_is_sick(error))
1236                 xfs_bmap_mark_sick(ip, whichfork);
1237         xfs_iext_destroy(ifp);
1238         return error;
1239 }
1240
1241 /*
1242  * Returns the relative block number of the first unused block(s) in the given
1243  * fork with at least "len" logically contiguous blocks free.  This is the
1244  * lowest-address hole if the fork has holes, else the first block past the end
1245  * of fork.  Return 0 if the fork is currently local (in-inode).
1246  */
1247 int                                             /* error */
1248 xfs_bmap_first_unused(
1249         struct xfs_trans        *tp,            /* transaction pointer */
1250         struct xfs_inode        *ip,            /* incore inode */
1251         xfs_extlen_t            len,            /* size of hole to find */
1252         xfs_fileoff_t           *first_unused,  /* unused block */
1253         int                     whichfork)      /* data or attr fork */
1254 {
1255         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
1256         struct xfs_bmbt_irec    got;
1257         struct xfs_iext_cursor  icur;
1258         xfs_fileoff_t           lastaddr = 0;
1259         xfs_fileoff_t           lowest, max;
1260         int                     error;
1261
1262         if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1263                 *first_unused = 0;
1264                 return 0;
1265         }
1266
1267         ASSERT(xfs_ifork_has_extents(ifp));
1268
1269         error = xfs_iread_extents(tp, ip, whichfork);
1270         if (error)
1271                 return error;
1272
1273         lowest = max = *first_unused;
1274         for_each_xfs_iext(ifp, &icur, &got) {
1275                 /*
1276                  * See if the hole before this extent will work.
1277                  */
1278                 if (got.br_startoff >= lowest + len &&
1279                     got.br_startoff - max >= len)
1280                         break;
1281                 lastaddr = got.br_startoff + got.br_blockcount;
1282                 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1283         }
1284
1285         *first_unused = max;
1286         return 0;
1287 }
1288
1289 /*
1290  * Returns the file-relative block number of the last block - 1 before
1291  * last_block (input value) in the file.
1292  * This is not based on i_size, it is based on the extent records.
1293  * Returns 0 for local files, as they do not have extent records.
1294  */
1295 int                                             /* error */
1296 xfs_bmap_last_before(
1297         struct xfs_trans        *tp,            /* transaction pointer */
1298         struct xfs_inode        *ip,            /* incore inode */
1299         xfs_fileoff_t           *last_block,    /* last block */
1300         int                     whichfork)      /* data or attr fork */
1301 {
1302         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
1303         struct xfs_bmbt_irec    got;
1304         struct xfs_iext_cursor  icur;
1305         int                     error;
1306
1307         switch (ifp->if_format) {
1308         case XFS_DINODE_FMT_LOCAL:
1309                 *last_block = 0;
1310                 return 0;
1311         case XFS_DINODE_FMT_BTREE:
1312         case XFS_DINODE_FMT_EXTENTS:
1313                 break;
1314         default:
1315                 ASSERT(0);
1316                 xfs_bmap_mark_sick(ip, whichfork);
1317                 return -EFSCORRUPTED;
1318         }
1319
1320         error = xfs_iread_extents(tp, ip, whichfork);
1321         if (error)
1322                 return error;
1323
1324         if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1325                 *last_block = 0;
1326         return 0;
1327 }
1328
1329 int
1330 xfs_bmap_last_extent(
1331         struct xfs_trans        *tp,
1332         struct xfs_inode        *ip,
1333         int                     whichfork,
1334         struct xfs_bmbt_irec    *rec,
1335         int                     *is_empty)
1336 {
1337         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
1338         struct xfs_iext_cursor  icur;
1339         int                     error;
1340
1341         error = xfs_iread_extents(tp, ip, whichfork);
1342         if (error)
1343                 return error;
1344
1345         xfs_iext_last(ifp, &icur);
1346         if (!xfs_iext_get_extent(ifp, &icur, rec))
1347                 *is_empty = 1;
1348         else
1349                 *is_empty = 0;
1350         return 0;
1351 }
1352
1353 /*
1354  * Check the last inode extent to determine whether this allocation will result
1355  * in blocks being allocated at the end of the file. When we allocate new data
1356  * blocks at the end of the file which do not start at the previous data block,
1357  * we will try to align the new blocks at stripe unit boundaries.
1358  *
1359  * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1360  * at, or past the EOF.
1361  */
1362 STATIC int
1363 xfs_bmap_isaeof(
1364         struct xfs_bmalloca     *bma,
1365         int                     whichfork)
1366 {
1367         struct xfs_bmbt_irec    rec;
1368         int                     is_empty;
1369         int                     error;
1370
1371         bma->aeof = false;
1372         error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1373                                      &is_empty);
1374         if (error)
1375                 return error;
1376
1377         if (is_empty) {
1378                 bma->aeof = true;
1379                 return 0;
1380         }
1381
1382         /*
1383          * Check if we are allocation or past the last extent, or at least into
1384          * the last delayed allocated extent.
1385          */
1386         bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1387                 (bma->offset >= rec.br_startoff &&
1388                  isnullstartblock(rec.br_startblock));
1389         return 0;
1390 }
1391
1392 /*
1393  * Returns the file-relative block number of the first block past eof in
1394  * the file.  This is not based on i_size, it is based on the extent records.
1395  * Returns 0 for local files, as they do not have extent records.
1396  */
1397 int
1398 xfs_bmap_last_offset(
1399         struct xfs_inode        *ip,
1400         xfs_fileoff_t           *last_block,
1401         int                     whichfork)
1402 {
1403         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
1404         struct xfs_bmbt_irec    rec;
1405         int                     is_empty;
1406         int                     error;
1407
1408         *last_block = 0;
1409
1410         if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1411                 return 0;
1412
1413         if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) {
1414                 xfs_bmap_mark_sick(ip, whichfork);
1415                 return -EFSCORRUPTED;
1416         }
1417
1418         error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1419         if (error || is_empty)
1420                 return error;
1421
1422         *last_block = rec.br_startoff + rec.br_blockcount;
1423         return 0;
1424 }
1425
1426 /*
1427  * Extent tree manipulation functions used during allocation.
1428  */
1429
1430 static inline bool
1431 xfs_bmap_same_rtgroup(
1432         struct xfs_inode        *ip,
1433         int                     whichfork,
1434         struct xfs_bmbt_irec    *left,
1435         struct xfs_bmbt_irec    *right)
1436 {
1437         struct xfs_mount        *mp = ip->i_mount;
1438
1439         if (xfs_ifork_is_realtime(ip, whichfork) && xfs_has_rtgroups(mp)) {
1440                 if (xfs_rtb_to_rgno(mp, left->br_startblock) !=
1441                     xfs_rtb_to_rgno(mp, right->br_startblock))
1442                         return false;
1443         }
1444
1445         return true;
1446 }
1447
1448 /*
1449  * Convert a delayed allocation to a real allocation.
1450  */
1451 STATIC int                              /* error */
1452 xfs_bmap_add_extent_delay_real(
1453         struct xfs_bmalloca     *bma,
1454         int                     whichfork)
1455 {
1456         struct xfs_mount        *mp = bma->ip->i_mount;
1457         struct xfs_ifork        *ifp = xfs_ifork_ptr(bma->ip, whichfork);
1458         struct xfs_bmbt_irec    *new = &bma->got;
1459         int                     error;  /* error return value */
1460         int                     i;      /* temp state */
1461         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1462         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1463                                         /* left is 0, right is 1, prev is 2 */
1464         int                     rval=0; /* return value (logging flags) */
1465         uint32_t                state = xfs_bmap_fork_to_state(whichfork);
1466         xfs_filblks_t           da_new; /* new count del alloc blocks used */
1467         xfs_filblks_t           da_old; /* old count del alloc blocks used */
1468         xfs_filblks_t           temp=0; /* value for da_new calculations */
1469         int                     tmp_rval;       /* partial logging flags */
1470         struct xfs_bmbt_irec    old;
1471
1472         ASSERT(whichfork != XFS_ATTR_FORK);
1473         ASSERT(!isnullstartblock(new->br_startblock));
1474         ASSERT(!bma->cur || (bma->cur->bc_flags & XFS_BTREE_BMBT_WASDEL));
1475
1476         XFS_STATS_INC(mp, xs_add_exlist);
1477
1478 #define LEFT            r[0]
1479 #define RIGHT           r[1]
1480 #define PREV            r[2]
1481
1482         /*
1483          * Set up a bunch of variables to make the tests simpler.
1484          */
1485         xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1486         new_endoff = new->br_startoff + new->br_blockcount;
1487         ASSERT(isnullstartblock(PREV.br_startblock));
1488         ASSERT(PREV.br_startoff <= new->br_startoff);
1489         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1490
1491         da_old = startblockval(PREV.br_startblock);
1492         da_new = 0;
1493
1494         /*
1495          * Set flags determining what part of the previous delayed allocation
1496          * extent is being replaced by a real allocation.
1497          */
1498         if (PREV.br_startoff == new->br_startoff)
1499                 state |= BMAP_LEFT_FILLING;
1500         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1501                 state |= BMAP_RIGHT_FILLING;
1502
1503         /*
1504          * Check and set flags if this segment has a left neighbor.
1505          * Don't set contiguous if the combined extent would be too large.
1506          */
1507         if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1508                 state |= BMAP_LEFT_VALID;
1509                 if (isnullstartblock(LEFT.br_startblock))
1510                         state |= BMAP_LEFT_DELAY;
1511         }
1512
1513         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1514             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1515             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1516             LEFT.br_state == new->br_state &&
1517             LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
1518             xfs_bmap_same_rtgroup(bma->ip, whichfork, &LEFT, new))
1519                 state |= BMAP_LEFT_CONTIG;
1520
1521         /*
1522          * Check and set flags if this segment has a right neighbor.
1523          * Don't set contiguous if the combined extent would be too large.
1524          * Also check for all-three-contiguous being too large.
1525          */
1526         if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1527                 state |= BMAP_RIGHT_VALID;
1528                 if (isnullstartblock(RIGHT.br_startblock))
1529                         state |= BMAP_RIGHT_DELAY;
1530         }
1531
1532         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1533             new_endoff == RIGHT.br_startoff &&
1534             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1535             new->br_state == RIGHT.br_state &&
1536             new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
1537             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1538                        BMAP_RIGHT_FILLING)) !=
1539                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1540                        BMAP_RIGHT_FILLING) ||
1541              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1542                         <= XFS_MAX_BMBT_EXTLEN) &&
1543             xfs_bmap_same_rtgroup(bma->ip, whichfork, new, &RIGHT))
1544                 state |= BMAP_RIGHT_CONTIG;
1545
1546         error = 0;
1547         /*
1548          * Switch out based on the FILLING and CONTIG state bits.
1549          */
1550         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1551                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1552         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1553              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1554                 /*
1555                  * Filling in all of a previously delayed allocation extent.
1556                  * The left and right neighbors are both contiguous with new.
1557                  */
1558                 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1559
1560                 xfs_iext_remove(bma->ip, &bma->icur, state);
1561                 xfs_iext_remove(bma->ip, &bma->icur, state);
1562                 xfs_iext_prev(ifp, &bma->icur);
1563                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1564                 ifp->if_nextents--;
1565
1566                 if (bma->cur == NULL)
1567                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1568                 else {
1569                         rval = XFS_ILOG_CORE;
1570                         error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1571                         if (error)
1572                                 goto done;
1573                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1574                                 xfs_btree_mark_sick(bma->cur);
1575                                 error = -EFSCORRUPTED;
1576                                 goto done;
1577                         }
1578                         error = xfs_btree_delete(bma->cur, &i);
1579                         if (error)
1580                                 goto done;
1581                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1582                                 xfs_btree_mark_sick(bma->cur);
1583                                 error = -EFSCORRUPTED;
1584                                 goto done;
1585                         }
1586                         error = xfs_btree_decrement(bma->cur, 0, &i);
1587                         if (error)
1588                                 goto done;
1589                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1590                                 xfs_btree_mark_sick(bma->cur);
1591                                 error = -EFSCORRUPTED;
1592                                 goto done;
1593                         }
1594                         error = xfs_bmbt_update(bma->cur, &LEFT);
1595                         if (error)
1596                                 goto done;
1597                 }
1598                 ASSERT(da_new <= da_old);
1599                 break;
1600
1601         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1602                 /*
1603                  * Filling in all of a previously delayed allocation extent.
1604                  * The left neighbor is contiguous, the right is not.
1605                  */
1606                 old = LEFT;
1607                 LEFT.br_blockcount += PREV.br_blockcount;
1608
1609                 xfs_iext_remove(bma->ip, &bma->icur, state);
1610                 xfs_iext_prev(ifp, &bma->icur);
1611                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1612
1613                 if (bma->cur == NULL)
1614                         rval = XFS_ILOG_DEXT;
1615                 else {
1616                         rval = 0;
1617                         error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1618                         if (error)
1619                                 goto done;
1620                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1621                                 xfs_btree_mark_sick(bma->cur);
1622                                 error = -EFSCORRUPTED;
1623                                 goto done;
1624                         }
1625                         error = xfs_bmbt_update(bma->cur, &LEFT);
1626                         if (error)
1627                                 goto done;
1628                 }
1629                 ASSERT(da_new <= da_old);
1630                 break;
1631
1632         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1633                 /*
1634                  * Filling in all of a previously delayed allocation extent.
1635                  * The right neighbor is contiguous, the left is not. Take care
1636                  * with delay -> unwritten extent allocation here because the
1637                  * delalloc record we are overwriting is always written.
1638                  */
1639                 PREV.br_startblock = new->br_startblock;
1640                 PREV.br_blockcount += RIGHT.br_blockcount;
1641                 PREV.br_state = new->br_state;
1642
1643                 xfs_iext_next(ifp, &bma->icur);
1644                 xfs_iext_remove(bma->ip, &bma->icur, state);
1645                 xfs_iext_prev(ifp, &bma->icur);
1646                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1647
1648                 if (bma->cur == NULL)
1649                         rval = XFS_ILOG_DEXT;
1650                 else {
1651                         rval = 0;
1652                         error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1653                         if (error)
1654                                 goto done;
1655                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1656                                 xfs_btree_mark_sick(bma->cur);
1657                                 error = -EFSCORRUPTED;
1658                                 goto done;
1659                         }
1660                         error = xfs_bmbt_update(bma->cur, &PREV);
1661                         if (error)
1662                                 goto done;
1663                 }
1664                 ASSERT(da_new <= da_old);
1665                 break;
1666
1667         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1668                 /*
1669                  * Filling in all of a previously delayed allocation extent.
1670                  * Neither the left nor right neighbors are contiguous with
1671                  * the new one.
1672                  */
1673                 PREV.br_startblock = new->br_startblock;
1674                 PREV.br_state = new->br_state;
1675                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1676                 ifp->if_nextents++;
1677
1678                 if (bma->cur == NULL)
1679                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1680                 else {
1681                         rval = XFS_ILOG_CORE;
1682                         error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1683                         if (error)
1684                                 goto done;
1685                         if (XFS_IS_CORRUPT(mp, i != 0)) {
1686                                 xfs_btree_mark_sick(bma->cur);
1687                                 error = -EFSCORRUPTED;
1688                                 goto done;
1689                         }
1690                         error = xfs_btree_insert(bma->cur, &i);
1691                         if (error)
1692                                 goto done;
1693                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1694                                 xfs_btree_mark_sick(bma->cur);
1695                                 error = -EFSCORRUPTED;
1696                                 goto done;
1697                         }
1698                 }
1699                 ASSERT(da_new <= da_old);
1700                 break;
1701
1702         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1703                 /*
1704                  * Filling in the first part of a previous delayed allocation.
1705                  * The left neighbor is contiguous.
1706                  */
1707                 old = LEFT;
1708                 temp = PREV.br_blockcount - new->br_blockcount;
1709                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1710                                 startblockval(PREV.br_startblock));
1711
1712                 LEFT.br_blockcount += new->br_blockcount;
1713
1714                 PREV.br_blockcount = temp;
1715                 PREV.br_startoff += new->br_blockcount;
1716                 PREV.br_startblock = nullstartblock(da_new);
1717
1718                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1719                 xfs_iext_prev(ifp, &bma->icur);
1720                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1721
1722                 if (bma->cur == NULL)
1723                         rval = XFS_ILOG_DEXT;
1724                 else {
1725                         rval = 0;
1726                         error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1727                         if (error)
1728                                 goto done;
1729                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1730                                 xfs_btree_mark_sick(bma->cur);
1731                                 error = -EFSCORRUPTED;
1732                                 goto done;
1733                         }
1734                         error = xfs_bmbt_update(bma->cur, &LEFT);
1735                         if (error)
1736                                 goto done;
1737                 }
1738                 ASSERT(da_new <= da_old);
1739                 break;
1740
1741         case BMAP_LEFT_FILLING:
1742                 /*
1743                  * Filling in the first part of a previous delayed allocation.
1744                  * The left neighbor is not contiguous.
1745                  */
1746                 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1747                 ifp->if_nextents++;
1748
1749                 if (bma->cur == NULL)
1750                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1751                 else {
1752                         rval = XFS_ILOG_CORE;
1753                         error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1754                         if (error)
1755                                 goto done;
1756                         if (XFS_IS_CORRUPT(mp, i != 0)) {
1757                                 xfs_btree_mark_sick(bma->cur);
1758                                 error = -EFSCORRUPTED;
1759                                 goto done;
1760                         }
1761                         error = xfs_btree_insert(bma->cur, &i);
1762                         if (error)
1763                                 goto done;
1764                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1765                                 xfs_btree_mark_sick(bma->cur);
1766                                 error = -EFSCORRUPTED;
1767                                 goto done;
1768                         }
1769                 }
1770
1771                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1772                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1773                                         &bma->cur, 1, &tmp_rval, whichfork);
1774                         rval |= tmp_rval;
1775                         if (error)
1776                                 goto done;
1777                 }
1778
1779                 temp = PREV.br_blockcount - new->br_blockcount;
1780                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1781                         startblockval(PREV.br_startblock) -
1782                         (bma->cur ? bma->cur->bc_bmap.allocated : 0));
1783
1784                 PREV.br_startoff = new_endoff;
1785                 PREV.br_blockcount = temp;
1786                 PREV.br_startblock = nullstartblock(da_new);
1787                 xfs_iext_next(ifp, &bma->icur);
1788                 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1789                 xfs_iext_prev(ifp, &bma->icur);
1790                 break;
1791
1792         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1793                 /*
1794                  * Filling in the last part of a previous delayed allocation.
1795                  * The right neighbor is contiguous with the new allocation.
1796                  */
1797                 old = RIGHT;
1798                 RIGHT.br_startoff = new->br_startoff;
1799                 RIGHT.br_startblock = new->br_startblock;
1800                 RIGHT.br_blockcount += new->br_blockcount;
1801
1802                 if (bma->cur == NULL)
1803                         rval = XFS_ILOG_DEXT;
1804                 else {
1805                         rval = 0;
1806                         error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1807                         if (error)
1808                                 goto done;
1809                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1810                                 xfs_btree_mark_sick(bma->cur);
1811                                 error = -EFSCORRUPTED;
1812                                 goto done;
1813                         }
1814                         error = xfs_bmbt_update(bma->cur, &RIGHT);
1815                         if (error)
1816                                 goto done;
1817                 }
1818
1819                 temp = PREV.br_blockcount - new->br_blockcount;
1820                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1821                         startblockval(PREV.br_startblock));
1822
1823                 PREV.br_blockcount = temp;
1824                 PREV.br_startblock = nullstartblock(da_new);
1825
1826                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1827                 xfs_iext_next(ifp, &bma->icur);
1828                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1829                 ASSERT(da_new <= da_old);
1830                 break;
1831
1832         case BMAP_RIGHT_FILLING:
1833                 /*
1834                  * Filling in the last part of a previous delayed allocation.
1835                  * The right neighbor is not contiguous.
1836                  */
1837                 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1838                 ifp->if_nextents++;
1839
1840                 if (bma->cur == NULL)
1841                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1842                 else {
1843                         rval = XFS_ILOG_CORE;
1844                         error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1845                         if (error)
1846                                 goto done;
1847                         if (XFS_IS_CORRUPT(mp, i != 0)) {
1848                                 xfs_btree_mark_sick(bma->cur);
1849                                 error = -EFSCORRUPTED;
1850                                 goto done;
1851                         }
1852                         error = xfs_btree_insert(bma->cur, &i);
1853                         if (error)
1854                                 goto done;
1855                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1856                                 xfs_btree_mark_sick(bma->cur);
1857                                 error = -EFSCORRUPTED;
1858                                 goto done;
1859                         }
1860                 }
1861
1862                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1863                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1864                                 &bma->cur, 1, &tmp_rval, whichfork);
1865                         rval |= tmp_rval;
1866                         if (error)
1867                                 goto done;
1868                 }
1869
1870                 temp = PREV.br_blockcount - new->br_blockcount;
1871                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1872                         startblockval(PREV.br_startblock) -
1873                         (bma->cur ? bma->cur->bc_bmap.allocated : 0));
1874
1875                 PREV.br_startblock = nullstartblock(da_new);
1876                 PREV.br_blockcount = temp;
1877                 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1878                 xfs_iext_next(ifp, &bma->icur);
1879                 ASSERT(da_new <= da_old);
1880                 break;
1881
1882         case 0:
1883                 /*
1884                  * Filling in the middle part of a previous delayed allocation.
1885                  * Contiguity is impossible here.
1886                  * This case is avoided almost all the time.
1887                  *
1888                  * We start with a delayed allocation:
1889                  *
1890                  * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1891                  *  PREV @ idx
1892                  *
1893                  * and we are allocating:
1894                  *                     +rrrrrrrrrrrrrrrrr+
1895                  *                            new
1896                  *
1897                  * and we set it up for insertion as:
1898                  * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1899                  *                            new
1900                  *  PREV @ idx          LEFT              RIGHT
1901                  *                      inserted at idx + 1
1902                  */
1903                 old = PREV;
1904
1905                 /* LEFT is the new middle */
1906                 LEFT = *new;
1907
1908                 /* RIGHT is the new right */
1909                 RIGHT.br_state = PREV.br_state;
1910                 RIGHT.br_startoff = new_endoff;
1911                 RIGHT.br_blockcount =
1912                         PREV.br_startoff + PREV.br_blockcount - new_endoff;
1913                 RIGHT.br_startblock =
1914                         nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1915                                         RIGHT.br_blockcount));
1916
1917                 /* truncate PREV */
1918                 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1919                 PREV.br_startblock =
1920                         nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1921                                         PREV.br_blockcount));
1922                 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1923
1924                 xfs_iext_next(ifp, &bma->icur);
1925                 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1926                 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1927                 ifp->if_nextents++;
1928
1929                 if (bma->cur == NULL)
1930                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1931                 else {
1932                         rval = XFS_ILOG_CORE;
1933                         error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1934                         if (error)
1935                                 goto done;
1936                         if (XFS_IS_CORRUPT(mp, i != 0)) {
1937                                 xfs_btree_mark_sick(bma->cur);
1938                                 error = -EFSCORRUPTED;
1939                                 goto done;
1940                         }
1941                         error = xfs_btree_insert(bma->cur, &i);
1942                         if (error)
1943                                 goto done;
1944                         if (XFS_IS_CORRUPT(mp, i != 1)) {
1945                                 xfs_btree_mark_sick(bma->cur);
1946                                 error = -EFSCORRUPTED;
1947                                 goto done;
1948                         }
1949                 }
1950
1951                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1952                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1953                                         &bma->cur, 1, &tmp_rval, whichfork);
1954                         rval |= tmp_rval;
1955                         if (error)
1956                                 goto done;
1957                 }
1958
1959                 da_new = startblockval(PREV.br_startblock) +
1960                          startblockval(RIGHT.br_startblock);
1961                 break;
1962
1963         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1964         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1965         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1966         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1967         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1968         case BMAP_LEFT_CONTIG:
1969         case BMAP_RIGHT_CONTIG:
1970                 /*
1971                  * These cases are all impossible.
1972                  */
1973                 ASSERT(0);
1974         }
1975
1976         /* add reverse mapping unless caller opted out */
1977         if (!(bma->flags & XFS_BMAPI_NORMAP))
1978                 xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1979
1980         /* convert to a btree if necessary */
1981         if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1982                 int     tmp_logflags;   /* partial log flag return val */
1983
1984                 ASSERT(bma->cur == NULL);
1985                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1986                                 &bma->cur, da_old > 0, &tmp_logflags,
1987                                 whichfork);
1988                 bma->logflags |= tmp_logflags;
1989                 if (error)
1990                         goto done;
1991         }
1992
1993         if (da_new != da_old)
1994                 xfs_mod_delalloc(bma->ip, 0, (int64_t)da_new - da_old);
1995
1996         if (bma->cur) {
1997                 da_new += bma->cur->bc_bmap.allocated;
1998                 bma->cur->bc_bmap.allocated = 0;
1999         }
2000
2001         /* adjust for changes in reserved delayed indirect blocks */
2002         if (da_new < da_old)
2003                 xfs_add_fdblocks(mp, da_old - da_new);
2004         else if (da_new > da_old)
2005                 error = xfs_dec_fdblocks(mp, da_new - da_old, true);
2006
2007         xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2008 done:
2009         if (whichfork != XFS_COW_FORK)
2010                 bma->logflags |= rval;
2011         return error;
2012 #undef  LEFT
2013 #undef  RIGHT
2014 #undef  PREV
2015 }
2016
2017 /*
2018  * Convert an unwritten allocation to a real allocation or vice versa.
2019  */
2020 int                                     /* error */
2021 xfs_bmap_add_extent_unwritten_real(
2022         struct xfs_trans        *tp,
2023         xfs_inode_t             *ip,    /* incore inode pointer */
2024         int                     whichfork,
2025         struct xfs_iext_cursor  *icur,
2026         struct xfs_btree_cur    **curp, /* if *curp is null, not a btree */
2027         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
2028         int                     *logflagsp) /* inode logging flags */
2029 {
2030         struct xfs_btree_cur    *cur;   /* btree cursor */
2031         int                     error;  /* error return value */
2032         int                     i;      /* temp state */
2033         struct xfs_ifork        *ifp;   /* inode fork pointer */
2034         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
2035         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
2036                                         /* left is 0, right is 1, prev is 2 */
2037         int                     rval=0; /* return value (logging flags) */
2038         uint32_t                state = xfs_bmap_fork_to_state(whichfork);
2039         struct xfs_mount        *mp = ip->i_mount;
2040         struct xfs_bmbt_irec    old;
2041
2042         *logflagsp = 0;
2043
2044         cur = *curp;
2045         ifp = xfs_ifork_ptr(ip, whichfork);
2046
2047         ASSERT(!isnullstartblock(new->br_startblock));
2048
2049         XFS_STATS_INC(mp, xs_add_exlist);
2050
2051 #define LEFT            r[0]
2052 #define RIGHT           r[1]
2053 #define PREV            r[2]
2054
2055         /*
2056          * Set up a bunch of variables to make the tests simpler.
2057          */
2058         error = 0;
2059         xfs_iext_get_extent(ifp, icur, &PREV);
2060         ASSERT(new->br_state != PREV.br_state);
2061         new_endoff = new->br_startoff + new->br_blockcount;
2062         ASSERT(PREV.br_startoff <= new->br_startoff);
2063         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2064
2065         /*
2066          * Set flags determining what part of the previous oldext allocation
2067          * extent is being replaced by a newext allocation.
2068          */
2069         if (PREV.br_startoff == new->br_startoff)
2070                 state |= BMAP_LEFT_FILLING;
2071         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2072                 state |= BMAP_RIGHT_FILLING;
2073
2074         /*
2075          * Check and set flags if this segment has a left neighbor.
2076          * Don't set contiguous if the combined extent would be too large.
2077          */
2078         if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2079                 state |= BMAP_LEFT_VALID;
2080                 if (isnullstartblock(LEFT.br_startblock))
2081                         state |= BMAP_LEFT_DELAY;
2082         }
2083
2084         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2085             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2086             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2087             LEFT.br_state == new->br_state &&
2088             LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2089             xfs_bmap_same_rtgroup(ip, whichfork, &LEFT, new))
2090                 state |= BMAP_LEFT_CONTIG;
2091
2092         /*
2093          * Check and set flags if this segment has a right neighbor.
2094          * Don't set contiguous if the combined extent would be too large.
2095          * Also check for all-three-contiguous being too large.
2096          */
2097         if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2098                 state |= BMAP_RIGHT_VALID;
2099                 if (isnullstartblock(RIGHT.br_startblock))
2100                         state |= BMAP_RIGHT_DELAY;
2101         }
2102
2103         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2104             new_endoff == RIGHT.br_startoff &&
2105             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2106             new->br_state == RIGHT.br_state &&
2107             new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2108             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2109                        BMAP_RIGHT_FILLING)) !=
2110                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2111                        BMAP_RIGHT_FILLING) ||
2112              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2113                         <= XFS_MAX_BMBT_EXTLEN) &&
2114             xfs_bmap_same_rtgroup(ip, whichfork, new, &RIGHT))
2115                 state |= BMAP_RIGHT_CONTIG;
2116
2117         /*
2118          * Switch out based on the FILLING and CONTIG state bits.
2119          */
2120         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2121                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2122         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2123              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2124                 /*
2125                  * Setting all of a previous oldext extent to newext.
2126                  * The left and right neighbors are both contiguous with new.
2127                  */
2128                 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2129
2130                 xfs_iext_remove(ip, icur, state);
2131                 xfs_iext_remove(ip, icur, state);
2132                 xfs_iext_prev(ifp, icur);
2133                 xfs_iext_update_extent(ip, state, icur, &LEFT);
2134                 ifp->if_nextents -= 2;
2135                 if (cur == NULL)
2136                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2137                 else {
2138                         rval = XFS_ILOG_CORE;
2139                         error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2140                         if (error)
2141                                 goto done;
2142                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2143                                 xfs_btree_mark_sick(cur);
2144                                 error = -EFSCORRUPTED;
2145                                 goto done;
2146                         }
2147                         if ((error = xfs_btree_delete(cur, &i)))
2148                                 goto done;
2149                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2150                                 xfs_btree_mark_sick(cur);
2151                                 error = -EFSCORRUPTED;
2152                                 goto done;
2153                         }
2154                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2155                                 goto done;
2156                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2157                                 xfs_btree_mark_sick(cur);
2158                                 error = -EFSCORRUPTED;
2159                                 goto done;
2160                         }
2161                         if ((error = xfs_btree_delete(cur, &i)))
2162                                 goto done;
2163                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2164                                 xfs_btree_mark_sick(cur);
2165                                 error = -EFSCORRUPTED;
2166                                 goto done;
2167                         }
2168                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2169                                 goto done;
2170                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2171                                 xfs_btree_mark_sick(cur);
2172                                 error = -EFSCORRUPTED;
2173                                 goto done;
2174                         }
2175                         error = xfs_bmbt_update(cur, &LEFT);
2176                         if (error)
2177                                 goto done;
2178                 }
2179                 break;
2180
2181         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2182                 /*
2183                  * Setting all of a previous oldext extent to newext.
2184                  * The left neighbor is contiguous, the right is not.
2185                  */
2186                 LEFT.br_blockcount += PREV.br_blockcount;
2187
2188                 xfs_iext_remove(ip, icur, state);
2189                 xfs_iext_prev(ifp, icur);
2190                 xfs_iext_update_extent(ip, state, icur, &LEFT);
2191                 ifp->if_nextents--;
2192                 if (cur == NULL)
2193                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2194                 else {
2195                         rval = XFS_ILOG_CORE;
2196                         error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2197                         if (error)
2198                                 goto done;
2199                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2200                                 xfs_btree_mark_sick(cur);
2201                                 error = -EFSCORRUPTED;
2202                                 goto done;
2203                         }
2204                         if ((error = xfs_btree_delete(cur, &i)))
2205                                 goto done;
2206                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2207                                 xfs_btree_mark_sick(cur);
2208                                 error = -EFSCORRUPTED;
2209                                 goto done;
2210                         }
2211                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2212                                 goto done;
2213                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2214                                 xfs_btree_mark_sick(cur);
2215                                 error = -EFSCORRUPTED;
2216                                 goto done;
2217                         }
2218                         error = xfs_bmbt_update(cur, &LEFT);
2219                         if (error)
2220                                 goto done;
2221                 }
2222                 break;
2223
2224         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2225                 /*
2226                  * Setting all of a previous oldext extent to newext.
2227                  * The right neighbor is contiguous, the left is not.
2228                  */
2229                 PREV.br_blockcount += RIGHT.br_blockcount;
2230                 PREV.br_state = new->br_state;
2231
2232                 xfs_iext_next(ifp, icur);
2233                 xfs_iext_remove(ip, icur, state);
2234                 xfs_iext_prev(ifp, icur);
2235                 xfs_iext_update_extent(ip, state, icur, &PREV);
2236                 ifp->if_nextents--;
2237
2238                 if (cur == NULL)
2239                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2240                 else {
2241                         rval = XFS_ILOG_CORE;
2242                         error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2243                         if (error)
2244                                 goto done;
2245                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2246                                 xfs_btree_mark_sick(cur);
2247                                 error = -EFSCORRUPTED;
2248                                 goto done;
2249                         }
2250                         if ((error = xfs_btree_delete(cur, &i)))
2251                                 goto done;
2252                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2253                                 xfs_btree_mark_sick(cur);
2254                                 error = -EFSCORRUPTED;
2255                                 goto done;
2256                         }
2257                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2258                                 goto done;
2259                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2260                                 xfs_btree_mark_sick(cur);
2261                                 error = -EFSCORRUPTED;
2262                                 goto done;
2263                         }
2264                         error = xfs_bmbt_update(cur, &PREV);
2265                         if (error)
2266                                 goto done;
2267                 }
2268                 break;
2269
2270         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2271                 /*
2272                  * Setting all of a previous oldext extent to newext.
2273                  * Neither the left nor right neighbors are contiguous with
2274                  * the new one.
2275                  */
2276                 PREV.br_state = new->br_state;
2277                 xfs_iext_update_extent(ip, state, icur, &PREV);
2278
2279                 if (cur == NULL)
2280                         rval = XFS_ILOG_DEXT;
2281                 else {
2282                         rval = 0;
2283                         error = xfs_bmbt_lookup_eq(cur, new, &i);
2284                         if (error)
2285                                 goto done;
2286                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2287                                 xfs_btree_mark_sick(cur);
2288                                 error = -EFSCORRUPTED;
2289                                 goto done;
2290                         }
2291                         error = xfs_bmbt_update(cur, &PREV);
2292                         if (error)
2293                                 goto done;
2294                 }
2295                 break;
2296
2297         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2298                 /*
2299                  * Setting the first part of a previous oldext extent to newext.
2300                  * The left neighbor is contiguous.
2301                  */
2302                 LEFT.br_blockcount += new->br_blockcount;
2303
2304                 old = PREV;
2305                 PREV.br_startoff += new->br_blockcount;
2306                 PREV.br_startblock += new->br_blockcount;
2307                 PREV.br_blockcount -= new->br_blockcount;
2308
2309                 xfs_iext_update_extent(ip, state, icur, &PREV);
2310                 xfs_iext_prev(ifp, icur);
2311                 xfs_iext_update_extent(ip, state, icur, &LEFT);
2312
2313                 if (cur == NULL)
2314                         rval = XFS_ILOG_DEXT;
2315                 else {
2316                         rval = 0;
2317                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2318                         if (error)
2319                                 goto done;
2320                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2321                                 xfs_btree_mark_sick(cur);
2322                                 error = -EFSCORRUPTED;
2323                                 goto done;
2324                         }
2325                         error = xfs_bmbt_update(cur, &PREV);
2326                         if (error)
2327                                 goto done;
2328                         error = xfs_btree_decrement(cur, 0, &i);
2329                         if (error)
2330                                 goto done;
2331                         error = xfs_bmbt_update(cur, &LEFT);
2332                         if (error)
2333                                 goto done;
2334                 }
2335                 break;
2336
2337         case BMAP_LEFT_FILLING:
2338                 /*
2339                  * Setting the first part of a previous oldext extent to newext.
2340                  * The left neighbor is not contiguous.
2341                  */
2342                 old = PREV;
2343                 PREV.br_startoff += new->br_blockcount;
2344                 PREV.br_startblock += new->br_blockcount;
2345                 PREV.br_blockcount -= new->br_blockcount;
2346
2347                 xfs_iext_update_extent(ip, state, icur, &PREV);
2348                 xfs_iext_insert(ip, icur, new, state);
2349                 ifp->if_nextents++;
2350
2351                 if (cur == NULL)
2352                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2353                 else {
2354                         rval = XFS_ILOG_CORE;
2355                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2356                         if (error)
2357                                 goto done;
2358                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2359                                 xfs_btree_mark_sick(cur);
2360                                 error = -EFSCORRUPTED;
2361                                 goto done;
2362                         }
2363                         error = xfs_bmbt_update(cur, &PREV);
2364                         if (error)
2365                                 goto done;
2366                         cur->bc_rec.b = *new;
2367                         if ((error = xfs_btree_insert(cur, &i)))
2368                                 goto done;
2369                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2370                                 xfs_btree_mark_sick(cur);
2371                                 error = -EFSCORRUPTED;
2372                                 goto done;
2373                         }
2374                 }
2375                 break;
2376
2377         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2378                 /*
2379                  * Setting the last part of a previous oldext extent to newext.
2380                  * The right neighbor is contiguous with the new allocation.
2381                  */
2382                 old = PREV;
2383                 PREV.br_blockcount -= new->br_blockcount;
2384
2385                 RIGHT.br_startoff = new->br_startoff;
2386                 RIGHT.br_startblock = new->br_startblock;
2387                 RIGHT.br_blockcount += new->br_blockcount;
2388
2389                 xfs_iext_update_extent(ip, state, icur, &PREV);
2390                 xfs_iext_next(ifp, icur);
2391                 xfs_iext_update_extent(ip, state, icur, &RIGHT);
2392
2393                 if (cur == NULL)
2394                         rval = XFS_ILOG_DEXT;
2395                 else {
2396                         rval = 0;
2397                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2398                         if (error)
2399                                 goto done;
2400                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2401                                 xfs_btree_mark_sick(cur);
2402                                 error = -EFSCORRUPTED;
2403                                 goto done;
2404                         }
2405                         error = xfs_bmbt_update(cur, &PREV);
2406                         if (error)
2407                                 goto done;
2408                         error = xfs_btree_increment(cur, 0, &i);
2409                         if (error)
2410                                 goto done;
2411                         error = xfs_bmbt_update(cur, &RIGHT);
2412                         if (error)
2413                                 goto done;
2414                 }
2415                 break;
2416
2417         case BMAP_RIGHT_FILLING:
2418                 /*
2419                  * Setting the last part of a previous oldext extent to newext.
2420                  * The right neighbor is not contiguous.
2421                  */
2422                 old = PREV;
2423                 PREV.br_blockcount -= new->br_blockcount;
2424
2425                 xfs_iext_update_extent(ip, state, icur, &PREV);
2426                 xfs_iext_next(ifp, icur);
2427                 xfs_iext_insert(ip, icur, new, state);
2428                 ifp->if_nextents++;
2429
2430                 if (cur == NULL)
2431                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2432                 else {
2433                         rval = XFS_ILOG_CORE;
2434                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2435                         if (error)
2436                                 goto done;
2437                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2438                                 xfs_btree_mark_sick(cur);
2439                                 error = -EFSCORRUPTED;
2440                                 goto done;
2441                         }
2442                         error = xfs_bmbt_update(cur, &PREV);
2443                         if (error)
2444                                 goto done;
2445                         error = xfs_bmbt_lookup_eq(cur, new, &i);
2446                         if (error)
2447                                 goto done;
2448                         if (XFS_IS_CORRUPT(mp, i != 0)) {
2449                                 xfs_btree_mark_sick(cur);
2450                                 error = -EFSCORRUPTED;
2451                                 goto done;
2452                         }
2453                         if ((error = xfs_btree_insert(cur, &i)))
2454                                 goto done;
2455                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2456                                 xfs_btree_mark_sick(cur);
2457                                 error = -EFSCORRUPTED;
2458                                 goto done;
2459                         }
2460                 }
2461                 break;
2462
2463         case 0:
2464                 /*
2465                  * Setting the middle part of a previous oldext extent to
2466                  * newext.  Contiguity is impossible here.
2467                  * One extent becomes three extents.
2468                  */
2469                 old = PREV;
2470                 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2471
2472                 r[0] = *new;
2473                 r[1].br_startoff = new_endoff;
2474                 r[1].br_blockcount =
2475                         old.br_startoff + old.br_blockcount - new_endoff;
2476                 r[1].br_startblock = new->br_startblock + new->br_blockcount;
2477                 r[1].br_state = PREV.br_state;
2478
2479                 xfs_iext_update_extent(ip, state, icur, &PREV);
2480                 xfs_iext_next(ifp, icur);
2481                 xfs_iext_insert(ip, icur, &r[1], state);
2482                 xfs_iext_insert(ip, icur, &r[0], state);
2483                 ifp->if_nextents += 2;
2484
2485                 if (cur == NULL)
2486                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2487                 else {
2488                         rval = XFS_ILOG_CORE;
2489                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2490                         if (error)
2491                                 goto done;
2492                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2493                                 xfs_btree_mark_sick(cur);
2494                                 error = -EFSCORRUPTED;
2495                                 goto done;
2496                         }
2497                         /* new right extent - oldext */
2498                         error = xfs_bmbt_update(cur, &r[1]);
2499                         if (error)
2500                                 goto done;
2501                         /* new left extent - oldext */
2502                         cur->bc_rec.b = PREV;
2503                         if ((error = xfs_btree_insert(cur, &i)))
2504                                 goto done;
2505                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2506                                 xfs_btree_mark_sick(cur);
2507                                 error = -EFSCORRUPTED;
2508                                 goto done;
2509                         }
2510                         /*
2511                          * Reset the cursor to the position of the new extent
2512                          * we are about to insert as we can't trust it after
2513                          * the previous insert.
2514                          */
2515                         error = xfs_bmbt_lookup_eq(cur, new, &i);
2516                         if (error)
2517                                 goto done;
2518                         if (XFS_IS_CORRUPT(mp, i != 0)) {
2519                                 xfs_btree_mark_sick(cur);
2520                                 error = -EFSCORRUPTED;
2521                                 goto done;
2522                         }
2523                         /* new middle extent - newext */
2524                         if ((error = xfs_btree_insert(cur, &i)))
2525                                 goto done;
2526                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2527                                 xfs_btree_mark_sick(cur);
2528                                 error = -EFSCORRUPTED;
2529                                 goto done;
2530                         }
2531                 }
2532                 break;
2533
2534         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2535         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2536         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2537         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2538         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2539         case BMAP_LEFT_CONTIG:
2540         case BMAP_RIGHT_CONTIG:
2541                 /*
2542                  * These cases are all impossible.
2543                  */
2544                 ASSERT(0);
2545         }
2546
2547         /* update reverse mappings */
2548         xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2549
2550         /* convert to a btree if necessary */
2551         if (xfs_bmap_needs_btree(ip, whichfork)) {
2552                 int     tmp_logflags;   /* partial log flag return val */
2553
2554                 ASSERT(cur == NULL);
2555                 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2556                                 &tmp_logflags, whichfork);
2557                 *logflagsp |= tmp_logflags;
2558                 if (error)
2559                         goto done;
2560         }
2561
2562         /* clear out the allocated field, done with it now in any case. */
2563         if (cur) {
2564                 cur->bc_bmap.allocated = 0;
2565                 *curp = cur;
2566         }
2567
2568         xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2569 done:
2570         *logflagsp |= rval;
2571         return error;
2572 #undef  LEFT
2573 #undef  RIGHT
2574 #undef  PREV
2575 }
2576
2577 /*
2578  * Convert a hole to a delayed allocation.
2579  */
2580 STATIC void
2581 xfs_bmap_add_extent_hole_delay(
2582         xfs_inode_t             *ip,    /* incore inode pointer */
2583         int                     whichfork,
2584         struct xfs_iext_cursor  *icur,
2585         xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
2586 {
2587         struct xfs_ifork        *ifp;   /* inode fork pointer */
2588         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2589         xfs_filblks_t           newlen=0;       /* new indirect size */
2590         xfs_filblks_t           oldlen=0;       /* old indirect size */
2591         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2592         uint32_t                state = xfs_bmap_fork_to_state(whichfork);
2593         xfs_filblks_t           temp;    /* temp for indirect calculations */
2594
2595         ifp = xfs_ifork_ptr(ip, whichfork);
2596         ASSERT(isnullstartblock(new->br_startblock));
2597
2598         /*
2599          * Check and set flags if this segment has a left neighbor
2600          */
2601         if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2602                 state |= BMAP_LEFT_VALID;
2603                 if (isnullstartblock(left.br_startblock))
2604                         state |= BMAP_LEFT_DELAY;
2605         }
2606
2607         /*
2608          * Check and set flags if the current (right) segment exists.
2609          * If it doesn't exist, we're converting the hole at end-of-file.
2610          */
2611         if (xfs_iext_get_extent(ifp, icur, &right)) {
2612                 state |= BMAP_RIGHT_VALID;
2613                 if (isnullstartblock(right.br_startblock))
2614                         state |= BMAP_RIGHT_DELAY;
2615         }
2616
2617         /*
2618          * Set contiguity flags on the left and right neighbors.
2619          * Don't let extents get too large, even if the pieces are contiguous.
2620          */
2621         if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2622             left.br_startoff + left.br_blockcount == new->br_startoff &&
2623             left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2624                 state |= BMAP_LEFT_CONTIG;
2625
2626         if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2627             new->br_startoff + new->br_blockcount == right.br_startoff &&
2628             new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2629             (!(state & BMAP_LEFT_CONTIG) ||
2630              (left.br_blockcount + new->br_blockcount +
2631               right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
2632                 state |= BMAP_RIGHT_CONTIG;
2633
2634         /*
2635          * Switch out based on the contiguity flags.
2636          */
2637         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2638         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2639                 /*
2640                  * New allocation is contiguous with delayed allocations
2641                  * on the left and on the right.
2642                  * Merge all three into a single extent record.
2643                  */
2644                 temp = left.br_blockcount + new->br_blockcount +
2645                         right.br_blockcount;
2646
2647                 oldlen = startblockval(left.br_startblock) +
2648                         startblockval(new->br_startblock) +
2649                         startblockval(right.br_startblock);
2650                 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2651                                          oldlen);
2652                 left.br_startblock = nullstartblock(newlen);
2653                 left.br_blockcount = temp;
2654
2655                 xfs_iext_remove(ip, icur, state);
2656                 xfs_iext_prev(ifp, icur);
2657                 xfs_iext_update_extent(ip, state, icur, &left);
2658                 break;
2659
2660         case BMAP_LEFT_CONTIG:
2661                 /*
2662                  * New allocation is contiguous with a delayed allocation
2663                  * on the left.
2664                  * Merge the new allocation with the left neighbor.
2665                  */
2666                 temp = left.br_blockcount + new->br_blockcount;
2667
2668                 oldlen = startblockval(left.br_startblock) +
2669                         startblockval(new->br_startblock);
2670                 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2671                                          oldlen);
2672                 left.br_blockcount = temp;
2673                 left.br_startblock = nullstartblock(newlen);
2674
2675                 xfs_iext_prev(ifp, icur);
2676                 xfs_iext_update_extent(ip, state, icur, &left);
2677                 break;
2678
2679         case BMAP_RIGHT_CONTIG:
2680                 /*
2681                  * New allocation is contiguous with a delayed allocation
2682                  * on the right.
2683                  * Merge the new allocation with the right neighbor.
2684                  */
2685                 temp = new->br_blockcount + right.br_blockcount;
2686                 oldlen = startblockval(new->br_startblock) +
2687                         startblockval(right.br_startblock);
2688                 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2689                                          oldlen);
2690                 right.br_startoff = new->br_startoff;
2691                 right.br_startblock = nullstartblock(newlen);
2692                 right.br_blockcount = temp;
2693                 xfs_iext_update_extent(ip, state, icur, &right);
2694                 break;
2695
2696         case 0:
2697                 /*
2698                  * New allocation is not contiguous with another
2699                  * delayed allocation.
2700                  * Insert a new entry.
2701                  */
2702                 oldlen = newlen = 0;
2703                 xfs_iext_insert(ip, icur, new, state);
2704                 break;
2705         }
2706         if (oldlen != newlen) {
2707                 ASSERT(oldlen > newlen);
2708                 xfs_add_fdblocks(ip->i_mount, oldlen - newlen);
2709
2710                 /*
2711                  * Nothing to do for disk quota accounting here.
2712                  */
2713                 xfs_mod_delalloc(ip, 0, (int64_t)newlen - oldlen);
2714         }
2715 }
2716
2717 /*
2718  * Convert a hole to a real allocation.
2719  */
2720 STATIC int                              /* error */
2721 xfs_bmap_add_extent_hole_real(
2722         struct xfs_trans        *tp,
2723         struct xfs_inode        *ip,
2724         int                     whichfork,
2725         struct xfs_iext_cursor  *icur,
2726         struct xfs_btree_cur    **curp,
2727         struct xfs_bmbt_irec    *new,
2728         int                     *logflagsp,
2729         uint32_t                flags)
2730 {
2731         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
2732         struct xfs_mount        *mp = ip->i_mount;
2733         struct xfs_btree_cur    *cur = *curp;
2734         int                     error;  /* error return value */
2735         int                     i;      /* temp state */
2736         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2737         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2738         int                     rval=0; /* return value (logging flags) */
2739         uint32_t                state = xfs_bmap_fork_to_state(whichfork);
2740         struct xfs_bmbt_irec    old;
2741
2742         ASSERT(!isnullstartblock(new->br_startblock));
2743         ASSERT(!cur || !(cur->bc_flags & XFS_BTREE_BMBT_WASDEL));
2744
2745         XFS_STATS_INC(mp, xs_add_exlist);
2746
2747         /*
2748          * Check and set flags if this segment has a left neighbor.
2749          */
2750         if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2751                 state |= BMAP_LEFT_VALID;
2752                 if (isnullstartblock(left.br_startblock))
2753                         state |= BMAP_LEFT_DELAY;
2754         }
2755
2756         /*
2757          * Check and set flags if this segment has a current value.
2758          * Not true if we're inserting into the "hole" at eof.
2759          */
2760         if (xfs_iext_get_extent(ifp, icur, &right)) {
2761                 state |= BMAP_RIGHT_VALID;
2762                 if (isnullstartblock(right.br_startblock))
2763                         state |= BMAP_RIGHT_DELAY;
2764         }
2765
2766         /*
2767          * We're inserting a real allocation between "left" and "right".
2768          * Set the contiguity flags.  Don't let extents get too large.
2769          */
2770         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2771             left.br_startoff + left.br_blockcount == new->br_startoff &&
2772             left.br_startblock + left.br_blockcount == new->br_startblock &&
2773             left.br_state == new->br_state &&
2774             left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2775             xfs_bmap_same_rtgroup(ip, whichfork, &left, new))
2776                 state |= BMAP_LEFT_CONTIG;
2777
2778         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2779             new->br_startoff + new->br_blockcount == right.br_startoff &&
2780             new->br_startblock + new->br_blockcount == right.br_startblock &&
2781             new->br_state == right.br_state &&
2782             new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2783             (!(state & BMAP_LEFT_CONTIG) ||
2784              left.br_blockcount + new->br_blockcount +
2785              right.br_blockcount <= XFS_MAX_BMBT_EXTLEN) &&
2786             xfs_bmap_same_rtgroup(ip, whichfork, new, &right))
2787                 state |= BMAP_RIGHT_CONTIG;
2788
2789         error = 0;
2790         /*
2791          * Select which case we're in here, and implement it.
2792          */
2793         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2794         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2795                 /*
2796                  * New allocation is contiguous with real allocations on the
2797                  * left and on the right.
2798                  * Merge all three into a single extent record.
2799                  */
2800                 left.br_blockcount += new->br_blockcount + right.br_blockcount;
2801
2802                 xfs_iext_remove(ip, icur, state);
2803                 xfs_iext_prev(ifp, icur);
2804                 xfs_iext_update_extent(ip, state, icur, &left);
2805                 ifp->if_nextents--;
2806
2807                 if (cur == NULL) {
2808                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2809                 } else {
2810                         rval = XFS_ILOG_CORE;
2811                         error = xfs_bmbt_lookup_eq(cur, &right, &i);
2812                         if (error)
2813                                 goto done;
2814                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2815                                 xfs_btree_mark_sick(cur);
2816                                 error = -EFSCORRUPTED;
2817                                 goto done;
2818                         }
2819                         error = xfs_btree_delete(cur, &i);
2820                         if (error)
2821                                 goto done;
2822                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2823                                 xfs_btree_mark_sick(cur);
2824                                 error = -EFSCORRUPTED;
2825                                 goto done;
2826                         }
2827                         error = xfs_btree_decrement(cur, 0, &i);
2828                         if (error)
2829                                 goto done;
2830                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2831                                 xfs_btree_mark_sick(cur);
2832                                 error = -EFSCORRUPTED;
2833                                 goto done;
2834                         }
2835                         error = xfs_bmbt_update(cur, &left);
2836                         if (error)
2837                                 goto done;
2838                 }
2839                 break;
2840
2841         case BMAP_LEFT_CONTIG:
2842                 /*
2843                  * New allocation is contiguous with a real allocation
2844                  * on the left.
2845                  * Merge the new allocation with the left neighbor.
2846                  */
2847                 old = left;
2848                 left.br_blockcount += new->br_blockcount;
2849
2850                 xfs_iext_prev(ifp, icur);
2851                 xfs_iext_update_extent(ip, state, icur, &left);
2852
2853                 if (cur == NULL) {
2854                         rval = xfs_ilog_fext(whichfork);
2855                 } else {
2856                         rval = 0;
2857                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2858                         if (error)
2859                                 goto done;
2860                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2861                                 xfs_btree_mark_sick(cur);
2862                                 error = -EFSCORRUPTED;
2863                                 goto done;
2864                         }
2865                         error = xfs_bmbt_update(cur, &left);
2866                         if (error)
2867                                 goto done;
2868                 }
2869                 break;
2870
2871         case BMAP_RIGHT_CONTIG:
2872                 /*
2873                  * New allocation is contiguous with a real allocation
2874                  * on the right.
2875                  * Merge the new allocation with the right neighbor.
2876                  */
2877                 old = right;
2878
2879                 right.br_startoff = new->br_startoff;
2880                 right.br_startblock = new->br_startblock;
2881                 right.br_blockcount += new->br_blockcount;
2882                 xfs_iext_update_extent(ip, state, icur, &right);
2883
2884                 if (cur == NULL) {
2885                         rval = xfs_ilog_fext(whichfork);
2886                 } else {
2887                         rval = 0;
2888                         error = xfs_bmbt_lookup_eq(cur, &old, &i);
2889                         if (error)
2890                                 goto done;
2891                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2892                                 xfs_btree_mark_sick(cur);
2893                                 error = -EFSCORRUPTED;
2894                                 goto done;
2895                         }
2896                         error = xfs_bmbt_update(cur, &right);
2897                         if (error)
2898                                 goto done;
2899                 }
2900                 break;
2901
2902         case 0:
2903                 /*
2904                  * New allocation is not contiguous with another
2905                  * real allocation.
2906                  * Insert a new entry.
2907                  */
2908                 xfs_iext_insert(ip, icur, new, state);
2909                 ifp->if_nextents++;
2910
2911                 if (cur == NULL) {
2912                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2913                 } else {
2914                         rval = XFS_ILOG_CORE;
2915                         error = xfs_bmbt_lookup_eq(cur, new, &i);
2916                         if (error)
2917                                 goto done;
2918                         if (XFS_IS_CORRUPT(mp, i != 0)) {
2919                                 xfs_btree_mark_sick(cur);
2920                                 error = -EFSCORRUPTED;
2921                                 goto done;
2922                         }
2923                         error = xfs_btree_insert(cur, &i);
2924                         if (error)
2925                                 goto done;
2926                         if (XFS_IS_CORRUPT(mp, i != 1)) {
2927                                 xfs_btree_mark_sick(cur);
2928                                 error = -EFSCORRUPTED;
2929                                 goto done;
2930                         }
2931                 }
2932                 break;
2933         }
2934
2935         /* add reverse mapping unless caller opted out */
2936         if (!(flags & XFS_BMAPI_NORMAP))
2937                 xfs_rmap_map_extent(tp, ip, whichfork, new);
2938
2939         /* convert to a btree if necessary */
2940         if (xfs_bmap_needs_btree(ip, whichfork)) {
2941                 int     tmp_logflags;   /* partial log flag return val */
2942
2943                 ASSERT(cur == NULL);
2944                 error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2945                                 &tmp_logflags, whichfork);
2946                 *logflagsp |= tmp_logflags;
2947                 cur = *curp;
2948                 if (error)
2949                         goto done;
2950         }
2951
2952         /* clear out the allocated field, done with it now in any case. */
2953         if (cur)
2954                 cur->bc_bmap.allocated = 0;
2955
2956         xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2957 done:
2958         *logflagsp |= rval;
2959         return error;
2960 }
2961
2962 /*
2963  * Functions used in the extent read, allocate and remove paths
2964  */
2965
2966 /*
2967  * Adjust the size of the new extent based on i_extsize and rt extsize.
2968  */
2969 int
2970 xfs_bmap_extsize_align(
2971         xfs_mount_t     *mp,
2972         xfs_bmbt_irec_t *gotp,          /* next extent pointer */
2973         xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
2974         xfs_extlen_t    extsz,          /* align to this extent size */
2975         int             rt,             /* is this a realtime inode? */
2976         int             eof,            /* is extent at end-of-file? */
2977         int             delay,          /* creating delalloc extent? */
2978         int             convert,        /* overwriting unwritten extent? */
2979         xfs_fileoff_t   *offp,          /* in/out: aligned offset */
2980         xfs_extlen_t    *lenp)          /* in/out: aligned length */
2981 {
2982         xfs_fileoff_t   orig_off;       /* original offset */
2983         xfs_extlen_t    orig_alen;      /* original length */
2984         xfs_fileoff_t   orig_end;       /* original off+len */
2985         xfs_fileoff_t   nexto;          /* next file offset */
2986         xfs_fileoff_t   prevo;          /* previous file offset */
2987         xfs_fileoff_t   align_off;      /* temp for offset */
2988         xfs_extlen_t    align_alen;     /* temp for length */
2989         xfs_extlen_t    temp;           /* temp for calculations */
2990
2991         if (convert)
2992                 return 0;
2993
2994         orig_off = align_off = *offp;
2995         orig_alen = align_alen = *lenp;
2996         orig_end = orig_off + orig_alen;
2997
2998         /*
2999          * If this request overlaps an existing extent, then don't
3000          * attempt to perform any additional alignment.
3001          */
3002         if (!delay && !eof &&
3003             (orig_off >= gotp->br_startoff) &&
3004             (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
3005                 return 0;
3006         }
3007
3008         /*
3009          * If the file offset is unaligned vs. the extent size
3010          * we need to align it.  This will be possible unless
3011          * the file was previously written with a kernel that didn't
3012          * perform this alignment, or if a truncate shot us in the
3013          * foot.
3014          */
3015         div_u64_rem(orig_off, extsz, &temp);
3016         if (temp) {
3017                 align_alen += temp;
3018                 align_off -= temp;
3019         }
3020
3021         /* Same adjustment for the end of the requested area. */
3022         temp = (align_alen % extsz);
3023         if (temp)
3024                 align_alen += extsz - temp;
3025
3026         /*
3027          * For large extent hint sizes, the aligned extent might be larger than
3028          * XFS_BMBT_MAX_EXTLEN. In that case, reduce the size by an extsz so
3029          * that it pulls the length back under XFS_BMBT_MAX_EXTLEN. The outer
3030          * allocation loops handle short allocation just fine, so it is safe to
3031          * do this. We only want to do it when we are forced to, though, because
3032          * it means more allocation operations are required.
3033          */
3034         while (align_alen > XFS_MAX_BMBT_EXTLEN)
3035                 align_alen -= extsz;
3036         ASSERT(align_alen <= XFS_MAX_BMBT_EXTLEN);
3037
3038         /*
3039          * If the previous block overlaps with this proposed allocation
3040          * then move the start forward without adjusting the length.
3041          */
3042         if (prevp->br_startoff != NULLFILEOFF) {
3043                 if (prevp->br_startblock == HOLESTARTBLOCK)
3044                         prevo = prevp->br_startoff;
3045                 else
3046                         prevo = prevp->br_startoff + prevp->br_blockcount;
3047         } else
3048                 prevo = 0;
3049         if (align_off != orig_off && align_off < prevo)
3050                 align_off = prevo;
3051         /*
3052          * If the next block overlaps with this proposed allocation
3053          * then move the start back without adjusting the length,
3054          * but not before offset 0.
3055          * This may of course make the start overlap previous block,
3056          * and if we hit the offset 0 limit then the next block
3057          * can still overlap too.
3058          */
3059         if (!eof && gotp->br_startoff != NULLFILEOFF) {
3060                 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3061                     (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3062                         nexto = gotp->br_startoff + gotp->br_blockcount;
3063                 else
3064                         nexto = gotp->br_startoff;
3065         } else
3066                 nexto = NULLFILEOFF;
3067         if (!eof &&
3068             align_off + align_alen != orig_end &&
3069             align_off + align_alen > nexto)
3070                 align_off = nexto > align_alen ? nexto - align_alen : 0;
3071         /*
3072          * If we're now overlapping the next or previous extent that
3073          * means we can't fit an extsz piece in this hole.  Just move
3074          * the start forward to the first valid spot and set
3075          * the length so we hit the end.
3076          */
3077         if (align_off != orig_off && align_off < prevo)
3078                 align_off = prevo;
3079         if (align_off + align_alen != orig_end &&
3080             align_off + align_alen > nexto &&
3081             nexto != NULLFILEOFF) {
3082                 ASSERT(nexto > prevo);
3083                 align_alen = nexto - align_off;
3084         }
3085
3086         /*
3087          * If realtime, and the result isn't a multiple of the realtime
3088          * extent size we need to remove blocks until it is.
3089          */
3090         if (rt && (temp = xfs_extlen_to_rtxmod(mp, align_alen))) {
3091                 /*
3092                  * We're not covering the original request, or
3093                  * we won't be able to once we fix the length.
3094                  */
3095                 if (orig_off < align_off ||
3096                     orig_end > align_off + align_alen ||
3097                     align_alen - temp < orig_alen)
3098                         return -EINVAL;
3099                 /*
3100                  * Try to fix it by moving the start up.
3101                  */
3102                 if (align_off + temp <= orig_off) {
3103                         align_alen -= temp;
3104                         align_off += temp;
3105                 }
3106                 /*
3107                  * Try to fix it by moving the end in.
3108                  */
3109                 else if (align_off + align_alen - temp >= orig_end)
3110                         align_alen -= temp;
3111                 /*
3112                  * Set the start to the minimum then trim the length.
3113                  */
3114                 else {
3115                         align_alen -= orig_off - align_off;
3116                         align_off = orig_off;
3117                         align_alen -= xfs_extlen_to_rtxmod(mp, align_alen);
3118                 }
3119                 /*
3120                  * Result doesn't cover the request, fail it.
3121                  */
3122                 if (orig_off < align_off || orig_end > align_off + align_alen)
3123                         return -EINVAL;
3124         } else {
3125                 ASSERT(orig_off >= align_off);
3126                 /* see XFS_BMBT_MAX_EXTLEN handling above */
3127                 ASSERT(orig_end <= align_off + align_alen ||
3128                        align_alen + extsz > XFS_MAX_BMBT_EXTLEN);
3129         }
3130
3131 #ifdef DEBUG
3132         if (!eof && gotp->br_startoff != NULLFILEOFF)
3133                 ASSERT(align_off + align_alen <= gotp->br_startoff);
3134         if (prevp->br_startoff != NULLFILEOFF)
3135                 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3136 #endif
3137
3138         *lenp = align_alen;
3139         *offp = align_off;
3140         return 0;
3141 }
3142
3143 static inline bool
3144 xfs_bmap_adjacent_valid(
3145         struct xfs_bmalloca     *ap,
3146         xfs_fsblock_t           x,
3147         xfs_fsblock_t           y)
3148 {
3149         struct xfs_mount        *mp = ap->ip->i_mount;
3150
3151         if (XFS_IS_REALTIME_INODE(ap->ip) &&
3152             (ap->datatype & XFS_ALLOC_USERDATA)) {
3153                 if (!xfs_has_rtgroups(mp))
3154                         return x < mp->m_sb.sb_rblocks;
3155
3156                 return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) &&
3157                         xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount &&
3158                         xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents;
3159
3160         }
3161
3162         return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) &&
3163                 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount &&
3164                 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks;
3165 }
3166
3167 #define XFS_ALLOC_GAP_UNITS     4
3168
3169 /* returns true if ap->blkno was modified */
3170 bool
3171 xfs_bmap_adjacent(
3172         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3173 {
3174         xfs_fsblock_t           adjust;         /* adjustment to block numbers */
3175
3176         /*
3177          * If allocating at eof, and there's a previous real block,
3178          * try to use its last block as our starting point.
3179          */
3180         if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3181             !isnullstartblock(ap->prev.br_startblock) &&
3182             xfs_bmap_adjacent_valid(ap,
3183                         ap->prev.br_startblock + ap->prev.br_blockcount,
3184                         ap->prev.br_startblock)) {
3185                 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3186                 /*
3187                  * Adjust for the gap between prevp and us.
3188                  */
3189                 adjust = ap->offset -
3190                         (ap->prev.br_startoff + ap->prev.br_blockcount);
3191                 if (adjust && xfs_bmap_adjacent_valid(ap, ap->blkno + adjust,
3192                                 ap->prev.br_startblock))
3193                         ap->blkno += adjust;
3194                 return true;
3195         }
3196         /*
3197          * If not at eof, then compare the two neighbor blocks.
3198          * Figure out whether either one gives us a good starting point,
3199          * and pick the better one.
3200          */
3201         if (!ap->eof) {
3202                 xfs_fsblock_t   gotbno;         /* right side block number */
3203                 xfs_fsblock_t   gotdiff=0;      /* right side difference */
3204                 xfs_fsblock_t   prevbno;        /* left side block number */
3205                 xfs_fsblock_t   prevdiff=0;     /* left side difference */
3206
3207                 /*
3208                  * If there's a previous (left) block, select a requested
3209                  * start block based on it.
3210                  */
3211                 if (ap->prev.br_startoff != NULLFILEOFF &&
3212                     !isnullstartblock(ap->prev.br_startblock) &&
3213                     (prevbno = ap->prev.br_startblock +
3214                                ap->prev.br_blockcount) &&
3215                     xfs_bmap_adjacent_valid(ap, prevbno,
3216                                 ap->prev.br_startblock)) {
3217                         /*
3218                          * Calculate gap to end of previous block.
3219                          */
3220                         adjust = prevdiff = ap->offset -
3221                                 (ap->prev.br_startoff +
3222                                  ap->prev.br_blockcount);
3223                         /*
3224                          * Figure the startblock based on the previous block's
3225                          * end and the gap size.
3226                          * Heuristic!
3227                          * If the gap is large relative to the piece we're
3228                          * allocating, or using it gives us an invalid block
3229                          * number, then just use the end of the previous block.
3230                          */
3231                         if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3232                             xfs_bmap_adjacent_valid(ap, prevbno + prevdiff,
3233                                         ap->prev.br_startblock))
3234                                 prevbno += adjust;
3235                         else
3236                                 prevdiff += adjust;
3237                 }
3238                 /*
3239                  * No previous block or can't follow it, just default.
3240                  */
3241                 else
3242                         prevbno = NULLFSBLOCK;
3243                 /*
3244                  * If there's a following (right) block, select a requested
3245                  * start block based on it.
3246                  */
3247                 if (!isnullstartblock(ap->got.br_startblock)) {
3248                         /*
3249                          * Calculate gap to start of next block.
3250                          */
3251                         adjust = gotdiff = ap->got.br_startoff - ap->offset;
3252                         /*
3253                          * Figure the startblock based on the next block's
3254                          * start and the gap size.
3255                          */
3256                         gotbno = ap->got.br_startblock;
3257                         /*
3258                          * Heuristic!
3259                          * If the gap is large relative to the piece we're
3260                          * allocating, or using it gives us an invalid block
3261                          * number, then just use the start of the next block
3262                          * offset by our length.
3263                          */
3264                         if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3265                             xfs_bmap_adjacent_valid(ap, gotbno - gotdiff,
3266                                         gotbno))
3267                                 gotbno -= adjust;
3268                         else if (xfs_bmap_adjacent_valid(ap, gotbno - ap->length,
3269                                         gotbno)) {
3270                                 gotbno -= ap->length;
3271                                 gotdiff += adjust - ap->length;
3272                         } else
3273                                 gotdiff += adjust;
3274                 }
3275                 /*
3276                  * No next block, just default.
3277                  */
3278                 else
3279                         gotbno = NULLFSBLOCK;
3280                 /*
3281                  * If both valid, pick the better one, else the only good
3282                  * one, else ap->blkno is already set (to 0 or the inode block).
3283                  */
3284                 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) {
3285                         ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3286                         return true;
3287                 }
3288                 if (prevbno != NULLFSBLOCK) {
3289                         ap->blkno = prevbno;
3290                         return true;
3291                 }
3292                 if (gotbno != NULLFSBLOCK) {
3293                         ap->blkno = gotbno;
3294                         return true;
3295                 }
3296         }
3297
3298         return false;
3299 }
3300
3301 int
3302 xfs_bmap_longest_free_extent(
3303         struct xfs_perag        *pag,
3304         struct xfs_trans        *tp,
3305         xfs_extlen_t            *blen)
3306 {
3307         xfs_extlen_t            longest;
3308         int                     error = 0;
3309
3310         if (!xfs_perag_initialised_agf(pag)) {
3311                 error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK,
3312                                 NULL);
3313                 if (error)
3314                         return error;
3315         }
3316
3317         longest = xfs_alloc_longest_free_extent(pag,
3318                                 xfs_alloc_min_freelist(pag_mount(pag), pag),
3319                                 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3320         if (*blen < longest)
3321                 *blen = longest;
3322
3323         return 0;
3324 }
3325
3326 static xfs_extlen_t
3327 xfs_bmap_select_minlen(
3328         struct xfs_bmalloca     *ap,
3329         struct xfs_alloc_arg    *args,
3330         xfs_extlen_t            blen)
3331 {
3332
3333         /*
3334          * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
3335          * possible that there is enough contiguous free space for this request.
3336          */
3337         if (blen < ap->minlen)
3338                 return ap->minlen;
3339
3340         /*
3341          * If the best seen length is less than the request length,
3342          * use the best as the minimum, otherwise we've got the maxlen we
3343          * were asked for.
3344          */
3345         if (blen < args->maxlen)
3346                 return blen;
3347         return args->maxlen;
3348 }
3349
3350 static int
3351 xfs_bmap_btalloc_select_lengths(
3352         struct xfs_bmalloca     *ap,
3353         struct xfs_alloc_arg    *args,
3354         xfs_extlen_t            *blen)
3355 {
3356         struct xfs_mount        *mp = args->mp;
3357         struct xfs_perag        *pag;
3358         xfs_agnumber_t          agno, startag;
3359         int                     error = 0;
3360
3361         if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3362                 args->total = ap->minlen;
3363                 args->minlen = ap->minlen;
3364                 return 0;
3365         }
3366
3367         args->total = ap->total;
3368         startag = XFS_FSB_TO_AGNO(mp, ap->blkno);
3369         if (startag == NULLAGNUMBER)
3370                 startag = 0;
3371
3372         *blen = 0;
3373         for_each_perag_wrap(mp, startag, agno, pag) {
3374                 error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
3375                 if (error && error != -EAGAIN)
3376                         break;
3377                 error = 0;
3378                 if (*blen >= args->maxlen)
3379                         break;
3380         }
3381         if (pag)
3382                 xfs_perag_rele(pag);
3383
3384         args->minlen = xfs_bmap_select_minlen(ap, args, *blen);
3385         return error;
3386 }
3387
3388 /* Update all inode and quota accounting for the allocation we just did. */
3389 void
3390 xfs_bmap_alloc_account(
3391         struct xfs_bmalloca     *ap)
3392 {
3393         bool                    isrt = XFS_IS_REALTIME_INODE(ap->ip) &&
3394                                         !(ap->flags & XFS_BMAPI_ATTRFORK);
3395         uint                    fld;
3396
3397         if (ap->flags & XFS_BMAPI_COWFORK) {
3398                 /*
3399                  * COW fork blocks are in-core only and thus are treated as
3400                  * in-core quota reservation (like delalloc blocks) even when
3401                  * converted to real blocks. The quota reservation is not
3402                  * accounted to disk until blocks are remapped to the data
3403                  * fork. So if these blocks were previously delalloc, we
3404                  * already have quota reservation and there's nothing to do
3405                  * yet.
3406                  */
3407                 if (ap->wasdel) {
3408                         xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0);
3409                         return;
3410                 }
3411
3412                 /*
3413                  * Otherwise, we've allocated blocks in a hole. The transaction
3414                  * has acquired in-core quota reservation for this extent.
3415                  * Rather than account these as real blocks, however, we reduce
3416                  * the transaction quota reservation based on the allocation.
3417                  * This essentially transfers the transaction quota reservation
3418                  * to that of a delalloc extent.
3419                  */
3420                 ap->ip->i_delayed_blks += ap->length;
3421                 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, isrt ?
3422                                 XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS,
3423                                 -(long)ap->length);
3424                 return;
3425         }
3426
3427         /* data/attr fork only */
3428         ap->ip->i_nblocks += ap->length;
3429         xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3430         if (ap->wasdel) {
3431                 ap->ip->i_delayed_blks -= ap->length;
3432                 xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0);
3433                 fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT;
3434         } else {
3435                 fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
3436         }
3437
3438         xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length);
3439 }
3440
3441 static int
3442 xfs_bmap_compute_alignments(
3443         struct xfs_bmalloca     *ap,
3444         struct xfs_alloc_arg    *args)
3445 {
3446         struct xfs_mount        *mp = args->mp;
3447         xfs_extlen_t            align = 0; /* minimum allocation alignment */
3448         int                     stripe_align = 0;
3449
3450         /* stripe alignment for allocation is determined by mount parameters */
3451         if (mp->m_swidth && xfs_has_swalloc(mp))
3452                 stripe_align = mp->m_swidth;
3453         else if (mp->m_dalign)
3454                 stripe_align = mp->m_dalign;
3455
3456         if (ap->flags & XFS_BMAPI_COWFORK)
3457                 align = xfs_get_cowextsz_hint(ap->ip);
3458         else if (ap->datatype & XFS_ALLOC_USERDATA)
3459                 align = xfs_get_extsz_hint(ap->ip);
3460         if (align) {
3461                 if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
3462                                         ap->eof, 0, ap->conv, &ap->offset,
3463                                         &ap->length))
3464                         ASSERT(0);
3465                 ASSERT(ap->length);
3466         }
3467
3468         /* apply extent size hints if obtained earlier */
3469         if (align) {
3470                 args->prod = align;
3471                 div_u64_rem(ap->offset, args->prod, &args->mod);
3472                 if (args->mod)
3473                         args->mod = args->prod - args->mod;
3474         } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3475                 args->prod = 1;
3476                 args->mod = 0;
3477         } else {
3478                 args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3479                 div_u64_rem(ap->offset, args->prod, &args->mod);
3480                 if (args->mod)
3481                         args->mod = args->prod - args->mod;
3482         }
3483
3484         return stripe_align;
3485 }
3486
3487 static void
3488 xfs_bmap_process_allocated_extent(
3489         struct xfs_bmalloca     *ap,
3490         struct xfs_alloc_arg    *args,
3491         xfs_fileoff_t           orig_offset,
3492         xfs_extlen_t            orig_length)
3493 {
3494         ap->blkno = args->fsbno;
3495         ap->length = args->len;
3496         /*
3497          * If the extent size hint is active, we tried to round the
3498          * caller's allocation request offset down to extsz and the
3499          * length up to another extsz boundary.  If we found a free
3500          * extent we mapped it in starting at this new offset.  If the
3501          * newly mapped space isn't long enough to cover any of the
3502          * range of offsets that was originally requested, move the
3503          * mapping up so that we can fill as much of the caller's
3504          * original request as possible.  Free space is apparently
3505          * very fragmented so we're unlikely to be able to satisfy the
3506          * hints anyway.
3507          */
3508         if (ap->length <= orig_length)
3509                 ap->offset = orig_offset;
3510         else if (ap->offset + ap->length < orig_offset + orig_length)
3511                 ap->offset = orig_offset + orig_length - ap->length;
3512         xfs_bmap_alloc_account(ap);
3513 }
3514
3515 static int
3516 xfs_bmap_exact_minlen_extent_alloc(
3517         struct xfs_bmalloca     *ap,
3518         struct xfs_alloc_arg    *args)
3519 {
3520         if (ap->minlen != 1) {
3521                 args->fsbno = NULLFSBLOCK;
3522                 return 0;
3523         }
3524
3525         args->alloc_minlen_only = 1;
3526         args->minlen = args->maxlen = ap->minlen;
3527         args->total = ap->total;
3528
3529         /*
3530          * Unlike the longest extent available in an AG, we don't track
3531          * the length of an AG's shortest extent.
3532          * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
3533          * hence we can afford to start traversing from the 0th AG since
3534          * we need not be concerned about a drop in performance in
3535          * "debug only" code paths.
3536          */
3537         ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0);
3538
3539         /*
3540          * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG
3541          * iteration and then drops args->total to args->minlen, which might be
3542          * required to find an allocation for the transaction reservation when
3543          * the file system is very full.
3544          */
3545         return xfs_bmap_btalloc_low_space(ap, args);
3546 }
3547
3548 /*
3549  * If we are not low on available data blocks and we are allocating at
3550  * EOF, optimise allocation for contiguous file extension and/or stripe
3551  * alignment of the new extent.
3552  *
3553  * NOTE: ap->aeof is only set if the allocation length is >= the
3554  * stripe unit and the allocation offset is at the end of file.
3555  */
3556 static int
3557 xfs_bmap_btalloc_at_eof(
3558         struct xfs_bmalloca     *ap,
3559         struct xfs_alloc_arg    *args,
3560         xfs_extlen_t            blen,
3561         int                     stripe_align,
3562         bool                    ag_only)
3563 {
3564         struct xfs_mount        *mp = args->mp;
3565         struct xfs_perag        *caller_pag = args->pag;
3566         int                     error;
3567
3568         /*
3569          * If there are already extents in the file, try an exact EOF block
3570          * allocation to extend the file as a contiguous extent. If that fails,
3571          * or it's the first allocation in a file, just try for a stripe aligned
3572          * allocation.
3573          */
3574         if (ap->offset) {
3575                 xfs_extlen_t    nextminlen = 0;
3576
3577                 /*
3578                  * Compute the minlen+alignment for the next case.  Set slop so
3579                  * that the value of minlen+alignment+slop doesn't go up between
3580                  * the calls.
3581                  */
3582                 args->alignment = 1;
3583                 if (blen > stripe_align && blen <= args->maxlen)
3584                         nextminlen = blen - stripe_align;
3585                 else
3586                         nextminlen = args->minlen;
3587                 if (nextminlen + stripe_align > args->minlen + 1)
3588                         args->minalignslop = nextminlen + stripe_align -
3589                                         args->minlen - 1;
3590                 else
3591                         args->minalignslop = 0;
3592
3593                 if (!caller_pag)
3594                         args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
3595                 error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
3596                 if (!caller_pag) {
3597                         xfs_perag_put(args->pag);
3598                         args->pag = NULL;
3599                 }
3600                 if (error)
3601                         return error;
3602
3603                 if (args->fsbno != NULLFSBLOCK)
3604                         return 0;
3605                 /*
3606                  * Exact allocation failed. Reset to try an aligned allocation
3607                  * according to the original allocation specification.
3608                  */
3609                 args->alignment = stripe_align;
3610                 args->minlen = nextminlen;
3611                 args->minalignslop = 0;
3612         } else {
3613                 /*
3614                  * Adjust minlen to try and preserve alignment if we
3615                  * can't guarantee an aligned maxlen extent.
3616                  */
3617                 args->alignment = stripe_align;
3618                 if (blen > args->alignment &&
3619                     blen <= args->maxlen + args->alignment)
3620                         args->minlen = blen - args->alignment;
3621                 args->minalignslop = 0;
3622         }
3623
3624         if (ag_only) {
3625                 error = xfs_alloc_vextent_near_bno(args, ap->blkno);
3626         } else {
3627                 args->pag = NULL;
3628                 error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3629                 ASSERT(args->pag == NULL);
3630                 args->pag = caller_pag;
3631         }
3632         if (error)
3633                 return error;
3634
3635         if (args->fsbno != NULLFSBLOCK)
3636                 return 0;
3637
3638         /*
3639          * Allocation failed, so turn return the allocation args to their
3640          * original non-aligned state so the caller can proceed on allocation
3641          * failure as if this function was never called.
3642          */
3643         args->alignment = 1;
3644         return 0;
3645 }
3646
3647 /*
3648  * We have failed multiple allocation attempts so now are in a low space
3649  * allocation situation. Try a locality first full filesystem minimum length
3650  * allocation whilst still maintaining necessary total block reservation
3651  * requirements.
3652  *
3653  * If that fails, we are now critically low on space, so perform a last resort
3654  * allocation attempt: no reserve, no locality, blocking, minimum length, full
3655  * filesystem free space scan. We also indicate to future allocations in this
3656  * transaction that we are critically low on space so they don't waste time on
3657  * allocation modes that are unlikely to succeed.
3658  */
3659 int
3660 xfs_bmap_btalloc_low_space(
3661         struct xfs_bmalloca     *ap,
3662         struct xfs_alloc_arg    *args)
3663 {
3664         int                     error;
3665
3666         if (args->minlen > ap->minlen) {
3667                 args->minlen = ap->minlen;
3668                 error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3669                 if (error || args->fsbno != NULLFSBLOCK)
3670                         return error;
3671         }
3672
3673         /* Last ditch attempt before failure is declared. */
3674         args->total = ap->minlen;
3675         error = xfs_alloc_vextent_first_ag(args, 0);
3676         if (error)
3677                 return error;
3678         ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3679         return 0;
3680 }
3681
3682 static int
3683 xfs_bmap_btalloc_filestreams(
3684         struct xfs_bmalloca     *ap,
3685         struct xfs_alloc_arg    *args,
3686         int                     stripe_align)
3687 {
3688         xfs_extlen_t            blen = 0;
3689         int                     error = 0;
3690
3691
3692         error = xfs_filestream_select_ag(ap, args, &blen);
3693         if (error)
3694                 return error;
3695         ASSERT(args->pag);
3696
3697         /*
3698          * If we are in low space mode, then optimal allocation will fail so
3699          * prepare for minimal allocation and jump to the low space algorithm
3700          * immediately.
3701          */
3702         if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3703                 args->minlen = ap->minlen;
3704                 ASSERT(args->fsbno == NULLFSBLOCK);
3705                 goto out_low_space;
3706         }
3707
3708         args->minlen = xfs_bmap_select_minlen(ap, args, blen);
3709         if (ap->aeof)
3710                 error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
3711                                 true);
3712
3713         if (!error && args->fsbno == NULLFSBLOCK)
3714                 error = xfs_alloc_vextent_near_bno(args, ap->blkno);
3715
3716 out_low_space:
3717         /*
3718          * We are now done with the perag reference for the filestreams
3719          * association provided by xfs_filestream_select_ag(). Release it now as
3720          * we've either succeeded, had a fatal error or we are out of space and
3721          * need to do a full filesystem scan for free space which will take it's
3722          * own references.
3723          */
3724         xfs_perag_rele(args->pag);
3725         args->pag = NULL;
3726         if (error || args->fsbno != NULLFSBLOCK)
3727                 return error;
3728
3729         return xfs_bmap_btalloc_low_space(ap, args);
3730 }
3731
3732 static int
3733 xfs_bmap_btalloc_best_length(
3734         struct xfs_bmalloca     *ap,
3735         struct xfs_alloc_arg    *args,
3736         int                     stripe_align)
3737 {
3738         xfs_extlen_t            blen = 0;
3739         int                     error;
3740
3741         ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino);
3742         xfs_bmap_adjacent(ap);
3743
3744         /*
3745          * Search for an allocation group with a single extent large enough for
3746          * the request.  If one isn't found, then adjust the minimum allocation
3747          * size to the largest space found.
3748          */
3749         error = xfs_bmap_btalloc_select_lengths(ap, args, &blen);
3750         if (error)
3751                 return error;
3752
3753         /*
3754          * Don't attempt optimal EOF allocation if previous allocations barely
3755          * succeeded due to being near ENOSPC. It is highly unlikely we'll get
3756          * optimal or even aligned allocations in this case, so don't waste time
3757          * trying.
3758          */
3759         if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
3760                 error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
3761                                 false);
3762                 if (error || args->fsbno != NULLFSBLOCK)
3763                         return error;
3764         }
3765
3766         error = xfs_alloc_vextent_start_ag(args, ap->blkno);
3767         if (error || args->fsbno != NULLFSBLOCK)
3768                 return error;
3769
3770         return xfs_bmap_btalloc_low_space(ap, args);
3771 }
3772
3773 static int
3774 xfs_bmap_btalloc(
3775         struct xfs_bmalloca     *ap)
3776 {
3777         struct xfs_mount        *mp = ap->ip->i_mount;
3778         struct xfs_alloc_arg    args = {
3779                 .tp             = ap->tp,
3780                 .mp             = mp,
3781                 .fsbno          = NULLFSBLOCK,
3782                 .oinfo          = XFS_RMAP_OINFO_SKIP_UPDATE,
3783                 .minleft        = ap->minleft,
3784                 .wasdel         = ap->wasdel,
3785                 .resv           = XFS_AG_RESV_NONE,
3786                 .datatype       = ap->datatype,
3787                 .alignment      = 1,
3788                 .minalignslop   = 0,
3789         };
3790         xfs_fileoff_t           orig_offset;
3791         xfs_extlen_t            orig_length;
3792         int                     error;
3793         int                     stripe_align;
3794
3795         ASSERT(ap->length);
3796         orig_offset = ap->offset;
3797         orig_length = ap->length;
3798
3799         stripe_align = xfs_bmap_compute_alignments(ap, &args);
3800
3801         /* Trim the allocation back to the maximum an AG can fit. */
3802         args.maxlen = min(ap->length, mp->m_ag_max_usable);
3803
3804         if (unlikely(XFS_TEST_ERROR(false, mp,
3805                         XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
3806                 error = xfs_bmap_exact_minlen_extent_alloc(ap, &args);
3807         else if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3808                         xfs_inode_is_filestream(ap->ip))
3809                 error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
3810         else
3811                 error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
3812         if (error)
3813                 return error;
3814
3815         if (args.fsbno != NULLFSBLOCK) {
3816                 xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3817                         orig_length);
3818         } else {
3819                 ap->blkno = NULLFSBLOCK;
3820                 ap->length = 0;
3821         }
3822         return 0;
3823 }
3824
3825 /* Trim extent to fit a logical block range. */
3826 void
3827 xfs_trim_extent(
3828         struct xfs_bmbt_irec    *irec,
3829         xfs_fileoff_t           bno,
3830         xfs_filblks_t           len)
3831 {
3832         xfs_fileoff_t           distance;
3833         xfs_fileoff_t           end = bno + len;
3834
3835         if (irec->br_startoff + irec->br_blockcount <= bno ||
3836             irec->br_startoff >= end) {
3837                 irec->br_blockcount = 0;
3838                 return;
3839         }
3840
3841         if (irec->br_startoff < bno) {
3842                 distance = bno - irec->br_startoff;
3843                 if (isnullstartblock(irec->br_startblock))
3844                         irec->br_startblock = DELAYSTARTBLOCK;
3845                 if (irec->br_startblock != DELAYSTARTBLOCK &&
3846                     irec->br_startblock != HOLESTARTBLOCK)
3847                         irec->br_startblock += distance;
3848                 irec->br_startoff += distance;
3849                 irec->br_blockcount -= distance;
3850         }
3851
3852         if (end < irec->br_startoff + irec->br_blockcount) {
3853                 distance = irec->br_startoff + irec->br_blockcount - end;
3854                 irec->br_blockcount -= distance;
3855         }
3856 }
3857
3858 /*
3859  * Trim the returned map to the required bounds
3860  */
3861 STATIC void
3862 xfs_bmapi_trim_map(
3863         struct xfs_bmbt_irec    *mval,
3864         struct xfs_bmbt_irec    *got,
3865         xfs_fileoff_t           *bno,
3866         xfs_filblks_t           len,
3867         xfs_fileoff_t           obno,
3868         xfs_fileoff_t           end,
3869         int                     n,
3870         uint32_t                flags)
3871 {
3872         if ((flags & XFS_BMAPI_ENTIRE) ||
3873             got->br_startoff + got->br_blockcount <= obno) {
3874                 *mval = *got;
3875                 if (isnullstartblock(got->br_startblock))
3876                         mval->br_startblock = DELAYSTARTBLOCK;
3877                 return;
3878         }
3879
3880         if (obno > *bno)
3881                 *bno = obno;
3882         ASSERT((*bno >= obno) || (n == 0));
3883         ASSERT(*bno < end);
3884         mval->br_startoff = *bno;
3885         if (isnullstartblock(got->br_startblock))
3886                 mval->br_startblock = DELAYSTARTBLOCK;
3887         else
3888                 mval->br_startblock = got->br_startblock +
3889                                         (*bno - got->br_startoff);
3890         /*
3891          * Return the minimum of what we got and what we asked for for
3892          * the length.  We can use the len variable here because it is
3893          * modified below and we could have been there before coming
3894          * here if the first part of the allocation didn't overlap what
3895          * was asked for.
3896          */
3897         mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3898                         got->br_blockcount - (*bno - got->br_startoff));
3899         mval->br_state = got->br_state;
3900         ASSERT(mval->br_blockcount <= len);
3901         return;
3902 }
3903
3904 /*
3905  * Update and validate the extent map to return
3906  */
3907 STATIC void
3908 xfs_bmapi_update_map(
3909         struct xfs_bmbt_irec    **map,
3910         xfs_fileoff_t           *bno,
3911         xfs_filblks_t           *len,
3912         xfs_fileoff_t           obno,
3913         xfs_fileoff_t           end,
3914         int                     *n,
3915         uint32_t                flags)
3916 {
3917         xfs_bmbt_irec_t *mval = *map;
3918
3919         ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3920                ((mval->br_startoff + mval->br_blockcount) <= end));
3921         ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3922                (mval->br_startoff < obno));
3923
3924         *bno = mval->br_startoff + mval->br_blockcount;
3925         *len = end - *bno;
3926         if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3927                 /* update previous map with new information */
3928                 ASSERT(mval->br_startblock == mval[-1].br_startblock);
3929                 ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3930                 ASSERT(mval->br_state == mval[-1].br_state);
3931                 mval[-1].br_blockcount = mval->br_blockcount;
3932                 mval[-1].br_state = mval->br_state;
3933         } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3934                    mval[-1].br_startblock != DELAYSTARTBLOCK &&
3935                    mval[-1].br_startblock != HOLESTARTBLOCK &&
3936                    mval->br_startblock == mval[-1].br_startblock +
3937                                           mval[-1].br_blockcount &&
3938                    mval[-1].br_state == mval->br_state) {
3939                 ASSERT(mval->br_startoff ==
3940                        mval[-1].br_startoff + mval[-1].br_blockcount);
3941                 mval[-1].br_blockcount += mval->br_blockcount;
3942         } else if (*n > 0 &&
3943                    mval->br_startblock == DELAYSTARTBLOCK &&
3944                    mval[-1].br_startblock == DELAYSTARTBLOCK &&
3945                    mval->br_startoff ==
3946                    mval[-1].br_startoff + mval[-1].br_blockcount) {
3947                 mval[-1].br_blockcount += mval->br_blockcount;
3948                 mval[-1].br_state = mval->br_state;
3949         } else if (!((*n == 0) &&
3950                      ((mval->br_startoff + mval->br_blockcount) <=
3951                       obno))) {
3952                 mval++;
3953                 (*n)++;
3954         }
3955         *map = mval;
3956 }
3957
3958 /*
3959  * Map file blocks to filesystem blocks without allocation.
3960  */
3961 int
3962 xfs_bmapi_read(
3963         struct xfs_inode        *ip,
3964         xfs_fileoff_t           bno,
3965         xfs_filblks_t           len,
3966         struct xfs_bmbt_irec    *mval,
3967         int                     *nmap,
3968         uint32_t                flags)
3969 {
3970         struct xfs_mount        *mp = ip->i_mount;
3971         int                     whichfork = xfs_bmapi_whichfork(flags);
3972         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
3973         struct xfs_bmbt_irec    got;
3974         xfs_fileoff_t           obno;
3975         xfs_fileoff_t           end;
3976         struct xfs_iext_cursor  icur;
3977         int                     error;
3978         bool                    eof = false;
3979         int                     n = 0;
3980
3981         ASSERT(*nmap >= 1);
3982         ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3983         xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
3984
3985         if (WARN_ON_ONCE(!ifp)) {
3986                 xfs_bmap_mark_sick(ip, whichfork);
3987                 return -EFSCORRUPTED;
3988         }
3989
3990         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3991             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
3992                 xfs_bmap_mark_sick(ip, whichfork);
3993                 return -EFSCORRUPTED;
3994         }
3995
3996         if (xfs_is_shutdown(mp))
3997                 return -EIO;
3998
3999         XFS_STATS_INC(mp, xs_blk_mapr);
4000
4001         error = xfs_iread_extents(NULL, ip, whichfork);
4002         if (error)
4003                 return error;
4004
4005         if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
4006                 eof = true;
4007         end = bno + len;
4008         obno = bno;
4009
4010         while (bno < end && n < *nmap) {
4011                 /* Reading past eof, act as though there's a hole up to end. */
4012                 if (eof)
4013                         got.br_startoff = end;
4014                 if (got.br_startoff > bno) {
4015                         /* Reading in a hole.  */
4016                         mval->br_startoff = bno;
4017                         mval->br_startblock = HOLESTARTBLOCK;
4018                         mval->br_blockcount =
4019                                 XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4020                         mval->br_state = XFS_EXT_NORM;
4021                         bno += mval->br_blockcount;
4022                         len -= mval->br_blockcount;
4023                         mval++;
4024                         n++;
4025                         continue;
4026                 }
4027
4028                 /* set up the extent map to return. */
4029                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4030                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4031
4032                 /* If we're done, stop now. */
4033                 if (bno >= end || n >= *nmap)
4034                         break;
4035
4036                 /* Else go on to the next record. */
4037                 if (!xfs_iext_next_extent(ifp, &icur, &got))
4038                         eof = true;
4039         }
4040         *nmap = n;
4041         return 0;
4042 }
4043
4044 /*
4045  * Add a delayed allocation extent to an inode. Blocks are reserved from the
4046  * global pool and the extent inserted into the inode in-core extent tree.
4047  *
4048  * On entry, got refers to the first extent beyond the offset of the extent to
4049  * allocate or eof is specified if no such extent exists. On return, got refers
4050  * to the extent record that was inserted to the inode fork.
4051  *
4052  * Note that the allocated extent may have been merged with contiguous extents
4053  * during insertion into the inode fork. Thus, got does not reflect the current
4054  * state of the inode fork on return. If necessary, the caller can use lastx to
4055  * look up the updated record in the inode fork.
4056  */
4057 int
4058 xfs_bmapi_reserve_delalloc(
4059         struct xfs_inode        *ip,
4060         int                     whichfork,
4061         xfs_fileoff_t           off,
4062         xfs_filblks_t           len,
4063         xfs_filblks_t           prealloc,
4064         struct xfs_bmbt_irec    *got,
4065         struct xfs_iext_cursor  *icur,
4066         int                     eof)
4067 {
4068         struct xfs_mount        *mp = ip->i_mount;
4069         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
4070         xfs_extlen_t            alen;
4071         xfs_extlen_t            indlen;
4072         uint64_t                fdblocks;
4073         int                     error;
4074         xfs_fileoff_t           aoff;
4075         bool                    use_cowextszhint =
4076                                         whichfork == XFS_COW_FORK && !prealloc;
4077
4078 retry:
4079         /*
4080          * Cap the alloc length. Keep track of prealloc so we know whether to
4081          * tag the inode before we return.
4082          */
4083         aoff = off;
4084         alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
4085         if (!eof)
4086                 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4087         if (prealloc && alen >= len)
4088                 prealloc = alen - len;
4089
4090         /*
4091          * If we're targetting the COW fork but aren't creating a speculative
4092          * posteof preallocation, try to expand the reservation to align with
4093          * the COW extent size hint if there's sufficient free space.
4094          *
4095          * Unlike the data fork, the CoW cancellation functions will free all
4096          * the reservations at inactivation, so we don't require that every
4097          * delalloc reservation have a dirty pagecache.
4098          */
4099         if (use_cowextszhint) {
4100                 struct xfs_bmbt_irec    prev;
4101                 xfs_extlen_t            extsz = xfs_get_cowextsz_hint(ip);
4102
4103                 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
4104                         prev.br_startoff = NULLFILEOFF;
4105
4106                 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
4107                                                1, 0, &aoff, &alen);
4108                 ASSERT(!error);
4109         }
4110
4111         /*
4112          * Make a transaction-less quota reservation for delayed allocation
4113          * blocks.  This number gets adjusted later.  We return if we haven't
4114          * allocated blocks already inside this loop.
4115          */
4116         error = xfs_quota_reserve_blkres(ip, alen);
4117         if (error)
4118                 goto out;
4119
4120         /*
4121          * Split changing sb for alen and indlen since they could be coming
4122          * from different places.
4123          */
4124         indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4125         ASSERT(indlen > 0);
4126
4127         fdblocks = indlen;
4128         if (XFS_IS_REALTIME_INODE(ip)) {
4129                 error = xfs_dec_frextents(mp, xfs_blen_to_rtbxlen(mp, alen));
4130                 if (error)
4131                         goto out_unreserve_quota;
4132         } else {
4133                 fdblocks += alen;
4134         }
4135
4136         error = xfs_dec_fdblocks(mp, fdblocks, false);
4137         if (error)
4138                 goto out_unreserve_frextents;
4139
4140         ip->i_delayed_blks += alen;
4141         xfs_mod_delalloc(ip, alen, indlen);
4142
4143         got->br_startoff = aoff;
4144         got->br_startblock = nullstartblock(indlen);
4145         got->br_blockcount = alen;
4146         got->br_state = XFS_EXT_NORM;
4147
4148         xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4149
4150         /*
4151          * Tag the inode if blocks were preallocated. Note that COW fork
4152          * preallocation can occur at the start or end of the extent, even when
4153          * prealloc == 0, so we must also check the aligned offset and length.
4154          */
4155         if (whichfork == XFS_DATA_FORK && prealloc)
4156                 xfs_inode_set_eofblocks_tag(ip);
4157         if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4158                 xfs_inode_set_cowblocks_tag(ip);
4159
4160         return 0;
4161
4162 out_unreserve_frextents:
4163         if (XFS_IS_REALTIME_INODE(ip))
4164                 xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, alen));
4165 out_unreserve_quota:
4166         if (XFS_IS_QUOTA_ON(mp))
4167                 xfs_quota_unreserve_blkres(ip, alen);
4168 out:
4169         if (error == -ENOSPC || error == -EDQUOT) {
4170                 trace_xfs_delalloc_enospc(ip, off, len);
4171
4172                 if (prealloc || use_cowextszhint) {
4173                         /* retry without any preallocation */
4174                         use_cowextszhint = false;
4175                         prealloc = 0;
4176                         goto retry;
4177                 }
4178         }
4179         return error;
4180 }
4181
4182 static int
4183 xfs_bmapi_allocate(
4184         struct xfs_bmalloca     *bma)
4185 {
4186         struct xfs_mount        *mp = bma->ip->i_mount;
4187         int                     whichfork = xfs_bmapi_whichfork(bma->flags);
4188         struct xfs_ifork        *ifp = xfs_ifork_ptr(bma->ip, whichfork);
4189         int                     error;
4190
4191         ASSERT(bma->length > 0);
4192         ASSERT(bma->length <= XFS_MAX_BMBT_EXTLEN);
4193
4194         if (bma->flags & XFS_BMAPI_CONTIG)
4195                 bma->minlen = bma->length;
4196         else
4197                 bma->minlen = 1;
4198
4199         if (!(bma->flags & XFS_BMAPI_METADATA)) {
4200                 /*
4201                  * For the data and COW fork, the first data in the file is
4202                  * treated differently to all other allocations. For the
4203                  * attribute fork, we only need to ensure the allocated range
4204                  * is not on the busy list.
4205                  */
4206                 bma->datatype = XFS_ALLOC_NOBUSY;
4207                 if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
4208                         bma->datatype |= XFS_ALLOC_USERDATA;
4209                         if (bma->offset == 0)
4210                                 bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4211
4212                         if (mp->m_dalign && bma->length >= mp->m_dalign) {
4213                                 error = xfs_bmap_isaeof(bma, whichfork);
4214                                 if (error)
4215                                         return error;
4216                         }
4217                 }
4218         }
4219
4220         if ((bma->datatype & XFS_ALLOC_USERDATA) &&
4221             XFS_IS_REALTIME_INODE(bma->ip))
4222                 error = xfs_bmap_rtalloc(bma);
4223         else
4224                 error = xfs_bmap_btalloc(bma);
4225         if (error)
4226                 return error;
4227         if (bma->blkno == NULLFSBLOCK)
4228                 return -ENOSPC;
4229
4230         if (WARN_ON_ONCE(!xfs_valid_startblock(bma->ip, bma->blkno))) {
4231                 xfs_bmap_mark_sick(bma->ip, whichfork);
4232                 return -EFSCORRUPTED;
4233         }
4234
4235         if (bma->flags & XFS_BMAPI_ZERO) {
4236                 error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4237                 if (error)
4238                         return error;
4239         }
4240
4241         if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur)
4242                 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4243         /*
4244          * Bump the number of extents we've allocated
4245          * in this call.
4246          */
4247         bma->nallocs++;
4248
4249         if (bma->cur && bma->wasdel)
4250                 bma->cur->bc_flags |= XFS_BTREE_BMBT_WASDEL;
4251
4252         bma->got.br_startoff = bma->offset;
4253         bma->got.br_startblock = bma->blkno;
4254         bma->got.br_blockcount = bma->length;
4255         bma->got.br_state = XFS_EXT_NORM;
4256
4257         if (bma->flags & XFS_BMAPI_PREALLOC)
4258                 bma->got.br_state = XFS_EXT_UNWRITTEN;
4259
4260         if (bma->wasdel)
4261                 error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4262         else
4263                 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4264                                 whichfork, &bma->icur, &bma->cur, &bma->got,
4265                                 &bma->logflags, bma->flags);
4266         if (error)
4267                 return error;
4268
4269         /*
4270          * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4271          * or xfs_bmap_add_extent_hole_real might have merged it into one of
4272          * the neighbouring ones.
4273          */
4274         xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4275
4276         ASSERT(bma->got.br_startoff <= bma->offset);
4277         ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4278                bma->offset + bma->length);
4279         ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4280                bma->got.br_state == XFS_EXT_UNWRITTEN);
4281         return 0;
4282 }
4283
4284 STATIC int
4285 xfs_bmapi_convert_unwritten(
4286         struct xfs_bmalloca     *bma,
4287         struct xfs_bmbt_irec    *mval,
4288         xfs_filblks_t           len,
4289         uint32_t                flags)
4290 {
4291         int                     whichfork = xfs_bmapi_whichfork(flags);
4292         struct xfs_ifork        *ifp = xfs_ifork_ptr(bma->ip, whichfork);
4293         int                     tmp_logflags = 0;
4294         int                     error;
4295
4296         /* check if we need to do unwritten->real conversion */
4297         if (mval->br_state == XFS_EXT_UNWRITTEN &&
4298             (flags & XFS_BMAPI_PREALLOC))
4299                 return 0;
4300
4301         /* check if we need to do real->unwritten conversion */
4302         if (mval->br_state == XFS_EXT_NORM &&
4303             (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4304                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4305                 return 0;
4306
4307         /*
4308          * Modify (by adding) the state flag, if writing.
4309          */
4310         ASSERT(mval->br_blockcount <= len);
4311         if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) {
4312                 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4313                                         bma->ip, whichfork);
4314         }
4315         mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4316                                 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4317
4318         /*
4319          * Before insertion into the bmbt, zero the range being converted
4320          * if required.
4321          */
4322         if (flags & XFS_BMAPI_ZERO) {
4323                 error = xfs_zero_extent(bma->ip, mval->br_startblock,
4324                                         mval->br_blockcount);
4325                 if (error)
4326                         return error;
4327         }
4328
4329         error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4330                         &bma->icur, &bma->cur, mval, &tmp_logflags);
4331         /*
4332          * Log the inode core unconditionally in the unwritten extent conversion
4333          * path because the conversion might not have done so (e.g., if the
4334          * extent count hasn't changed). We need to make sure the inode is dirty
4335          * in the transaction for the sake of fsync(), even if nothing has
4336          * changed, because fsync() will not force the log for this transaction
4337          * unless it sees the inode pinned.
4338          *
4339          * Note: If we're only converting cow fork extents, there aren't
4340          * any on-disk updates to make, so we don't need to log anything.
4341          */
4342         if (whichfork != XFS_COW_FORK)
4343                 bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4344         if (error)
4345                 return error;
4346
4347         /*
4348          * Update our extent pointer, given that
4349          * xfs_bmap_add_extent_unwritten_real might have merged it into one
4350          * of the neighbouring ones.
4351          */
4352         xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4353
4354         /*
4355          * We may have combined previously unwritten space with written space,
4356          * so generate another request.
4357          */
4358         if (mval->br_blockcount < len)
4359                 return -EAGAIN;
4360         return 0;
4361 }
4362
4363 xfs_extlen_t
4364 xfs_bmapi_minleft(
4365         struct xfs_trans        *tp,
4366         struct xfs_inode        *ip,
4367         int                     fork)
4368 {
4369         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, fork);
4370
4371         if (tp && tp->t_highest_agno != NULLAGNUMBER)
4372                 return 0;
4373         if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4374                 return 1;
4375         return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4376 }
4377
4378 /*
4379  * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4380  * a case where the data is changed, there's an error, and it's not logged so we
4381  * don't shutdown when we should.  Don't bother logging extents/btree changes if
4382  * we converted to the other format.
4383  */
4384 static void
4385 xfs_bmapi_finish(
4386         struct xfs_bmalloca     *bma,
4387         int                     whichfork,
4388         int                     error)
4389 {
4390         struct xfs_ifork        *ifp = xfs_ifork_ptr(bma->ip, whichfork);
4391
4392         if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4393             ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4394                 bma->logflags &= ~xfs_ilog_fext(whichfork);
4395         else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4396                  ifp->if_format != XFS_DINODE_FMT_BTREE)
4397                 bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4398
4399         if (bma->logflags)
4400                 xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4401         if (bma->cur)
4402                 xfs_btree_del_cursor(bma->cur, error);
4403 }
4404
4405 /*
4406  * Map file blocks to filesystem blocks, and allocate blocks or convert the
4407  * extent state if necessary.  Details behaviour is controlled by the flags
4408  * parameter.  Only allocates blocks from a single allocation group, to avoid
4409  * locking problems.
4410  *
4411  * Returns 0 on success and places the extent mappings in mval.  nmaps is used
4412  * as an input/output parameter where the caller specifies the maximum number
4413  * of mappings that may be returned and xfs_bmapi_write passes back the number
4414  * of mappings (including existing mappings) it found.
4415  *
4416  * Returns a negative error code on failure, including -ENOSPC when it could not
4417  * allocate any blocks and -ENOSR when it did allocate blocks to convert a
4418  * delalloc range, but those blocks were before the passed in range.
4419  */
4420 int
4421 xfs_bmapi_write(
4422         struct xfs_trans        *tp,            /* transaction pointer */
4423         struct xfs_inode        *ip,            /* incore inode */
4424         xfs_fileoff_t           bno,            /* starting file offs. mapped */
4425         xfs_filblks_t           len,            /* length to map in file */
4426         uint32_t                flags,          /* XFS_BMAPI_... */
4427         xfs_extlen_t            total,          /* total blocks needed */
4428         struct xfs_bmbt_irec    *mval,          /* output: map values */
4429         int                     *nmap)          /* i/o: mval size/count */
4430 {
4431         struct xfs_bmalloca     bma = {
4432                 .tp             = tp,
4433                 .ip             = ip,
4434                 .total          = total,
4435         };
4436         struct xfs_mount        *mp = ip->i_mount;
4437         int                     whichfork = xfs_bmapi_whichfork(flags);
4438         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
4439         xfs_fileoff_t           end;            /* end of mapped file region */
4440         bool                    eof = false;    /* after the end of extents */
4441         int                     error;          /* error return */
4442         int                     n;              /* current extent index */
4443         xfs_fileoff_t           obno;           /* old block number (offset) */
4444
4445 #ifdef DEBUG
4446         xfs_fileoff_t           orig_bno;       /* original block number value */
4447         int                     orig_flags;     /* original flags arg value */
4448         xfs_filblks_t           orig_len;       /* original value of len arg */
4449         struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4450         int                     orig_nmap;      /* original value of *nmap */
4451
4452         orig_bno = bno;
4453         orig_len = len;
4454         orig_flags = flags;
4455         orig_mval = mval;
4456         orig_nmap = *nmap;
4457 #endif
4458
4459         ASSERT(*nmap >= 1);
4460         ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4461         ASSERT(tp != NULL);
4462         ASSERT(len > 0);
4463         ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4464         xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
4465         ASSERT(!(flags & XFS_BMAPI_REMAP));
4466
4467         /* zeroing is for currently only for data extents, not metadata */
4468         ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4469                         (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4470         /*
4471          * we can allocate unwritten extents or pre-zero allocated blocks,
4472          * but it makes no sense to do both at once. This would result in
4473          * zeroing the unwritten extent twice, but it still being an
4474          * unwritten extent....
4475          */
4476         ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4477                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4478
4479         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4480             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4481                 xfs_bmap_mark_sick(ip, whichfork);
4482                 return -EFSCORRUPTED;
4483         }
4484
4485         if (xfs_is_shutdown(mp))
4486                 return -EIO;
4487
4488         XFS_STATS_INC(mp, xs_blk_mapw);
4489
4490         error = xfs_iread_extents(tp, ip, whichfork);
4491         if (error)
4492                 goto error0;
4493
4494         if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4495                 eof = true;
4496         if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4497                 bma.prev.br_startoff = NULLFILEOFF;
4498         bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4499
4500         n = 0;
4501         end = bno + len;
4502         obno = bno;
4503         while (bno < end && n < *nmap) {
4504                 bool                    need_alloc = false, wasdelay = false;
4505
4506                 /* in hole or beyond EOF? */
4507                 if (eof || bma.got.br_startoff > bno) {
4508                         /*
4509                          * CoW fork conversions should /never/ hit EOF or
4510                          * holes.  There should always be something for us
4511                          * to work on.
4512                          */
4513                         ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4514                                  (flags & XFS_BMAPI_COWFORK)));
4515
4516                         need_alloc = true;
4517                 } else if (isnullstartblock(bma.got.br_startblock)) {
4518                         wasdelay = true;
4519                 }
4520
4521                 /*
4522                  * First, deal with the hole before the allocated space
4523                  * that we found, if any.
4524                  */
4525                 if (need_alloc || wasdelay) {
4526                         bma.eof = eof;
4527                         bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4528                         bma.wasdel = wasdelay;
4529                         bma.offset = bno;
4530                         bma.flags = flags;
4531
4532                         /*
4533                          * There's a 32/64 bit type mismatch between the
4534                          * allocation length request (which can be 64 bits in
4535                          * length) and the bma length request, which is
4536                          * xfs_extlen_t and therefore 32 bits. Hence we have to
4537                          * be careful and do the min() using the larger type to
4538                          * avoid overflows.
4539                          */
4540                         bma.length = XFS_FILBLKS_MIN(len, XFS_MAX_BMBT_EXTLEN);
4541
4542                         if (wasdelay) {
4543                                 bma.length = XFS_FILBLKS_MIN(bma.length,
4544                                         bma.got.br_blockcount -
4545                                         (bno - bma.got.br_startoff));
4546                         } else {
4547                                 if (!eof)
4548                                         bma.length = XFS_FILBLKS_MIN(bma.length,
4549                                                 bma.got.br_startoff - bno);
4550                         }
4551
4552                         ASSERT(bma.length > 0);
4553                         error = xfs_bmapi_allocate(&bma);
4554                         if (error) {
4555                                 /*
4556                                  * If we already allocated space in a previous
4557                                  * iteration return what we go so far when
4558                                  * running out of space.
4559                                  */
4560                                 if (error == -ENOSPC && bma.nallocs)
4561                                         break;
4562                                 goto error0;
4563                         }
4564
4565                         /*
4566                          * If this is a CoW allocation, record the data in
4567                          * the refcount btree for orphan recovery.
4568                          */
4569                         if (whichfork == XFS_COW_FORK)
4570                                 xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4571                                                 bma.length);
4572                 }
4573
4574                 /* Deal with the allocated space we found.  */
4575                 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4576                                                         end, n, flags);
4577
4578                 /* Execute unwritten extent conversion if necessary */
4579                 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4580                 if (error == -EAGAIN)
4581                         continue;
4582                 if (error)
4583                         goto error0;
4584
4585                 /* update the extent map to return */
4586                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4587
4588                 /*
4589                  * If we're done, stop now.  Stop when we've allocated
4590                  * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4591                  * the transaction may get too big.
4592                  */
4593                 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4594                         break;
4595
4596                 /* Else go on to the next record. */
4597                 bma.prev = bma.got;
4598                 if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4599                         eof = true;
4600         }
4601
4602         error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4603                         whichfork);
4604         if (error)
4605                 goto error0;
4606
4607         ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4608                ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4609         xfs_bmapi_finish(&bma, whichfork, 0);
4610         xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4611                 orig_nmap, n);
4612
4613         /*
4614          * When converting delayed allocations, xfs_bmapi_allocate ignores
4615          * the passed in bno and always converts from the start of the found
4616          * delalloc extent.
4617          *
4618          * To avoid a successful return with *nmap set to 0, return the magic
4619          * -ENOSR error code for this particular case so that the caller can
4620          * handle it.
4621          */
4622         if (!n) {
4623                 ASSERT(bma.nallocs >= *nmap);
4624                 return -ENOSR;
4625         }
4626         *nmap = n;
4627         return 0;
4628 error0:
4629         xfs_bmapi_finish(&bma, whichfork, error);
4630         return error;
4631 }
4632
4633 /*
4634  * Convert an existing delalloc extent to real blocks based on file offset. This
4635  * attempts to allocate the entire delalloc extent and may require multiple
4636  * invocations to allocate the target offset if a large enough physical extent
4637  * is not available.
4638  */
4639 static int
4640 xfs_bmapi_convert_one_delalloc(
4641         struct xfs_inode        *ip,
4642         int                     whichfork,
4643         xfs_off_t               offset,
4644         struct iomap            *iomap,
4645         unsigned int            *seq)
4646 {
4647         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
4648         struct xfs_mount        *mp = ip->i_mount;
4649         xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
4650         struct xfs_bmalloca     bma = { NULL };
4651         uint16_t                flags = 0;
4652         struct xfs_trans        *tp;
4653         int                     error;
4654
4655         if (whichfork == XFS_COW_FORK)
4656                 flags |= IOMAP_F_SHARED;
4657
4658         /*
4659          * Space for the extent and indirect blocks was reserved when the
4660          * delalloc extent was created so there's no need to do so here.
4661          */
4662         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4663                                 XFS_TRANS_RESERVE, &tp);
4664         if (error)
4665                 return error;
4666
4667         xfs_ilock(ip, XFS_ILOCK_EXCL);
4668         xfs_trans_ijoin(tp, ip, 0);
4669
4670         error = xfs_iext_count_extend(tp, ip, whichfork,
4671                         XFS_IEXT_ADD_NOSPLIT_CNT);
4672         if (error)
4673                 goto out_trans_cancel;
4674
4675         if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4676             bma.got.br_startoff > offset_fsb) {
4677                 /*
4678                  * No extent found in the range we are trying to convert.  This
4679                  * should only happen for the COW fork, where another thread
4680                  * might have moved the extent to the data fork in the meantime.
4681                  */
4682                 WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4683                 error = -EAGAIN;
4684                 goto out_trans_cancel;
4685         }
4686
4687         /*
4688          * If we find a real extent here we raced with another thread converting
4689          * the extent.  Just return the real extent at this offset.
4690          */
4691         if (!isnullstartblock(bma.got.br_startblock)) {
4692                 xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
4693                                 xfs_iomap_inode_sequence(ip, flags));
4694                 if (seq)
4695                         *seq = READ_ONCE(ifp->if_seq);
4696                 goto out_trans_cancel;
4697         }
4698
4699         bma.tp = tp;
4700         bma.ip = ip;
4701         bma.wasdel = true;
4702         bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4703
4704         /*
4705          * Always allocate convert from the start of the delalloc extent even if
4706          * that is outside the passed in range to create large contiguous
4707          * extents on disk.
4708          */
4709         bma.offset = bma.got.br_startoff;
4710         bma.length = bma.got.br_blockcount;
4711
4712         /*
4713          * When we're converting the delalloc reservations backing dirty pages
4714          * in the page cache, we must be careful about how we create the new
4715          * extents:
4716          *
4717          * New CoW fork extents are created unwritten, turned into real extents
4718          * when we're about to write the data to disk, and mapped into the data
4719          * fork after the write finishes.  End of story.
4720          *
4721          * New data fork extents must be mapped in as unwritten and converted
4722          * to real extents after the write succeeds to avoid exposing stale
4723          * disk contents if we crash.
4724          */
4725         bma.flags = XFS_BMAPI_PREALLOC;
4726         if (whichfork == XFS_COW_FORK)
4727                 bma.flags |= XFS_BMAPI_COWFORK;
4728
4729         if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4730                 bma.prev.br_startoff = NULLFILEOFF;
4731
4732         error = xfs_bmapi_allocate(&bma);
4733         if (error)
4734                 goto out_finish;
4735
4736         XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4737         XFS_STATS_INC(mp, xs_xstrat_quick);
4738
4739         ASSERT(!isnullstartblock(bma.got.br_startblock));
4740         xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
4741                                 xfs_iomap_inode_sequence(ip, flags));
4742         if (seq)
4743                 *seq = READ_ONCE(ifp->if_seq);
4744
4745         if (whichfork == XFS_COW_FORK)
4746                 xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4747
4748         error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4749                         whichfork);
4750         if (error)
4751                 goto out_finish;
4752
4753         xfs_bmapi_finish(&bma, whichfork, 0);
4754         error = xfs_trans_commit(tp);
4755         xfs_iunlock(ip, XFS_ILOCK_EXCL);
4756         return error;
4757
4758 out_finish:
4759         xfs_bmapi_finish(&bma, whichfork, error);
4760 out_trans_cancel:
4761         xfs_trans_cancel(tp);
4762         xfs_iunlock(ip, XFS_ILOCK_EXCL);
4763         return error;
4764 }
4765
4766 /*
4767  * Pass in a dellalloc extent and convert it to real extents, return the real
4768  * extent that maps offset_fsb in iomap.
4769  */
4770 int
4771 xfs_bmapi_convert_delalloc(
4772         struct xfs_inode        *ip,
4773         int                     whichfork,
4774         loff_t                  offset,
4775         struct iomap            *iomap,
4776         unsigned int            *seq)
4777 {
4778         int                     error;
4779
4780         /*
4781          * Attempt to allocate whatever delalloc extent currently backs offset
4782          * and put the result into iomap.  Allocate in a loop because it may
4783          * take several attempts to allocate real blocks for a contiguous
4784          * delalloc extent if free space is sufficiently fragmented.
4785          */
4786         do {
4787                 error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset,
4788                                         iomap, seq);
4789                 if (error)
4790                         return error;
4791         } while (iomap->offset + iomap->length <= offset);
4792
4793         return 0;
4794 }
4795
4796 int
4797 xfs_bmapi_remap(
4798         struct xfs_trans        *tp,
4799         struct xfs_inode        *ip,
4800         xfs_fileoff_t           bno,
4801         xfs_filblks_t           len,
4802         xfs_fsblock_t           startblock,
4803         uint32_t                flags)
4804 {
4805         struct xfs_mount        *mp = ip->i_mount;
4806         struct xfs_ifork        *ifp;
4807         struct xfs_btree_cur    *cur = NULL;
4808         struct xfs_bmbt_irec    got;
4809         struct xfs_iext_cursor  icur;
4810         int                     whichfork = xfs_bmapi_whichfork(flags);
4811         int                     logflags = 0, error;
4812
4813         ifp = xfs_ifork_ptr(ip, whichfork);
4814         ASSERT(len > 0);
4815         ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN);
4816         xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
4817         ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4818                            XFS_BMAPI_NORMAP)));
4819         ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4820                         (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4821
4822         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4823             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4824                 xfs_bmap_mark_sick(ip, whichfork);
4825                 return -EFSCORRUPTED;
4826         }
4827
4828         if (xfs_is_shutdown(mp))
4829                 return -EIO;
4830
4831         error = xfs_iread_extents(tp, ip, whichfork);
4832         if (error)
4833                 return error;
4834
4835         if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4836                 /* make sure we only reflink into a hole. */
4837                 ASSERT(got.br_startoff > bno);
4838                 ASSERT(got.br_startoff - bno >= len);
4839         }
4840
4841         ip->i_nblocks += len;
4842         ip->i_delayed_blks -= len; /* see xfs_bmap_defer_add */
4843         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4844
4845         if (ifp->if_format == XFS_DINODE_FMT_BTREE)
4846                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4847
4848         got.br_startoff = bno;
4849         got.br_startblock = startblock;
4850         got.br_blockcount = len;
4851         if (flags & XFS_BMAPI_PREALLOC)
4852                 got.br_state = XFS_EXT_UNWRITTEN;
4853         else
4854                 got.br_state = XFS_EXT_NORM;
4855
4856         error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4857                         &cur, &got, &logflags, flags);
4858         if (error)
4859                 goto error0;
4860
4861         error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4862
4863 error0:
4864         if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4865                 logflags &= ~XFS_ILOG_DEXT;
4866         else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4867                 logflags &= ~XFS_ILOG_DBROOT;
4868
4869         if (logflags)
4870                 xfs_trans_log_inode(tp, ip, logflags);
4871         if (cur)
4872                 xfs_btree_del_cursor(cur, error);
4873         return error;
4874 }
4875
4876 /*
4877  * When a delalloc extent is split (e.g., due to a hole punch), the original
4878  * indlen reservation must be shared across the two new extents that are left
4879  * behind.
4880  *
4881  * Given the original reservation and the worst case indlen for the two new
4882  * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4883  * reservation fairly across the two new extents. If necessary, steal available
4884  * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4885  * ores == 1). The number of stolen blocks is returned. The availability and
4886  * subsequent accounting of stolen blocks is the responsibility of the caller.
4887  */
4888 static void
4889 xfs_bmap_split_indlen(
4890         xfs_filblks_t                   ores,           /* original res. */
4891         xfs_filblks_t                   *indlen1,       /* ext1 worst indlen */
4892         xfs_filblks_t                   *indlen2)       /* ext2 worst indlen */
4893 {
4894         xfs_filblks_t                   len1 = *indlen1;
4895         xfs_filblks_t                   len2 = *indlen2;
4896         xfs_filblks_t                   nres = len1 + len2; /* new total res. */
4897         xfs_filblks_t                   resfactor;
4898
4899         /*
4900          * We can't meet the total required reservation for the two extents.
4901          * Calculate the percent of the overall shortage between both extents
4902          * and apply this percentage to each of the requested indlen values.
4903          * This distributes the shortage fairly and reduces the chances that one
4904          * of the two extents is left with nothing when extents are repeatedly
4905          * split.
4906          */
4907         resfactor = (ores * 100);
4908         do_div(resfactor, nres);
4909         len1 *= resfactor;
4910         do_div(len1, 100);
4911         len2 *= resfactor;
4912         do_div(len2, 100);
4913         ASSERT(len1 + len2 <= ores);
4914         ASSERT(len1 < *indlen1 && len2 < *indlen2);
4915
4916         /*
4917          * Hand out the remainder to each extent. If one of the two reservations
4918          * is zero, we want to make sure that one gets a block first. The loop
4919          * below starts with len1, so hand len2 a block right off the bat if it
4920          * is zero.
4921          */
4922         ores -= (len1 + len2);
4923         ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4924         if (ores && !len2 && *indlen2) {
4925                 len2++;
4926                 ores--;
4927         }
4928         while (ores) {
4929                 if (len1 < *indlen1) {
4930                         len1++;
4931                         ores--;
4932                 }
4933                 if (!ores)
4934                         break;
4935                 if (len2 < *indlen2) {
4936                         len2++;
4937                         ores--;
4938                 }
4939         }
4940
4941         *indlen1 = len1;
4942         *indlen2 = len2;
4943 }
4944
4945 void
4946 xfs_bmap_del_extent_delay(
4947         struct xfs_inode        *ip,
4948         int                     whichfork,
4949         struct xfs_iext_cursor  *icur,
4950         struct xfs_bmbt_irec    *got,
4951         struct xfs_bmbt_irec    *del)
4952 {
4953         struct xfs_mount        *mp = ip->i_mount;
4954         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
4955         struct xfs_bmbt_irec    new;
4956         int64_t                 da_old, da_new, da_diff = 0;
4957         xfs_fileoff_t           del_endoff, got_endoff;
4958         xfs_filblks_t           got_indlen, new_indlen, stolen = 0;
4959         uint32_t                state = xfs_bmap_fork_to_state(whichfork);
4960         uint64_t                fdblocks;
4961         bool                    isrt;
4962
4963         XFS_STATS_INC(mp, xs_del_exlist);
4964
4965         isrt = xfs_ifork_is_realtime(ip, whichfork);
4966         del_endoff = del->br_startoff + del->br_blockcount;
4967         got_endoff = got->br_startoff + got->br_blockcount;
4968         da_old = startblockval(got->br_startblock);
4969         da_new = 0;
4970
4971         ASSERT(del->br_blockcount > 0);
4972         ASSERT(got->br_startoff <= del->br_startoff);
4973         ASSERT(got_endoff >= del_endoff);
4974
4975         /*
4976          * Update the inode delalloc counter now and wait to update the
4977          * sb counters as we might have to borrow some blocks for the
4978          * indirect block accounting.
4979          */
4980         xfs_quota_unreserve_blkres(ip, del->br_blockcount);
4981         ip->i_delayed_blks -= del->br_blockcount;
4982
4983         if (got->br_startoff == del->br_startoff)
4984                 state |= BMAP_LEFT_FILLING;
4985         if (got_endoff == del_endoff)
4986                 state |= BMAP_RIGHT_FILLING;
4987
4988         switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4989         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4990                 /*
4991                  * Matches the whole extent.  Delete the entry.
4992                  */
4993                 xfs_iext_remove(ip, icur, state);
4994                 xfs_iext_prev(ifp, icur);
4995                 break;
4996         case BMAP_LEFT_FILLING:
4997                 /*
4998                  * Deleting the first part of the extent.
4999                  */
5000                 got->br_startoff = del_endoff;
5001                 got->br_blockcount -= del->br_blockcount;
5002                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
5003                                 got->br_blockcount), da_old);
5004                 got->br_startblock = nullstartblock((int)da_new);
5005                 xfs_iext_update_extent(ip, state, icur, got);
5006                 break;
5007         case BMAP_RIGHT_FILLING:
5008                 /*
5009                  * Deleting the last part of the extent.
5010                  */
5011                 got->br_blockcount = got->br_blockcount - del->br_blockcount;
5012                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
5013                                 got->br_blockcount), da_old);
5014                 got->br_startblock = nullstartblock((int)da_new);
5015                 xfs_iext_update_extent(ip, state, icur, got);
5016                 break;
5017         case 0:
5018                 /*
5019                  * Deleting the middle of the extent.
5020                  *
5021                  * Distribute the original indlen reservation across the two new
5022                  * extents.  Steal blocks from the deleted extent if necessary.
5023                  * Stealing blocks simply fudges the fdblocks accounting below.
5024                  * Warn if either of the new indlen reservations is zero as this
5025                  * can lead to delalloc problems.
5026                  */
5027                 got->br_blockcount = del->br_startoff - got->br_startoff;
5028                 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
5029
5030                 new.br_blockcount = got_endoff - del_endoff;
5031                 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
5032
5033                 WARN_ON_ONCE(!got_indlen || !new_indlen);
5034                 /*
5035                  * Steal as many blocks as we can to try and satisfy the worst
5036                  * case indlen for both new extents.
5037                  *
5038                  * However, we can't just steal reservations from the data
5039                  * blocks if this is an RT inodes as the data and metadata
5040                  * blocks come from different pools.  We'll have to live with
5041                  * under-filled indirect reservation in this case.
5042                  */
5043                 da_new = got_indlen + new_indlen;
5044                 if (da_new > da_old && !isrt) {
5045                         stolen = XFS_FILBLKS_MIN(da_new - da_old,
5046                                                  del->br_blockcount);
5047                         da_old += stolen;
5048                 }
5049                 if (da_new > da_old)
5050                         xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen);
5051                 da_new = got_indlen + new_indlen;
5052
5053                 got->br_startblock = nullstartblock((int)got_indlen);
5054
5055                 new.br_startoff = del_endoff;
5056                 new.br_state = got->br_state;
5057                 new.br_startblock = nullstartblock((int)new_indlen);
5058
5059                 xfs_iext_update_extent(ip, state, icur, got);
5060                 xfs_iext_next(ifp, icur);
5061                 xfs_iext_insert(ip, icur, &new, state);
5062
5063                 del->br_blockcount -= stolen;
5064                 break;
5065         }
5066
5067         ASSERT(da_old >= da_new);
5068         da_diff = da_old - da_new;
5069         fdblocks = da_diff;
5070
5071         if (isrt)
5072                 xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, del->br_blockcount));
5073         else
5074                 fdblocks += del->br_blockcount;
5075
5076         xfs_add_fdblocks(mp, fdblocks);
5077         xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff);
5078 }
5079
5080 void
5081 xfs_bmap_del_extent_cow(
5082         struct xfs_inode        *ip,
5083         struct xfs_iext_cursor  *icur,
5084         struct xfs_bmbt_irec    *got,
5085         struct xfs_bmbt_irec    *del)
5086 {
5087         struct xfs_mount        *mp = ip->i_mount;
5088         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
5089         struct xfs_bmbt_irec    new;
5090         xfs_fileoff_t           del_endoff, got_endoff;
5091         uint32_t                state = BMAP_COWFORK;
5092
5093         XFS_STATS_INC(mp, xs_del_exlist);
5094
5095         del_endoff = del->br_startoff + del->br_blockcount;
5096         got_endoff = got->br_startoff + got->br_blockcount;
5097
5098         ASSERT(del->br_blockcount > 0);
5099         ASSERT(got->br_startoff <= del->br_startoff);
5100         ASSERT(got_endoff >= del_endoff);
5101         ASSERT(!isnullstartblock(got->br_startblock));
5102
5103         if (got->br_startoff == del->br_startoff)
5104                 state |= BMAP_LEFT_FILLING;
5105         if (got_endoff == del_endoff)
5106                 state |= BMAP_RIGHT_FILLING;
5107
5108         switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5109         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5110                 /*
5111                  * Matches the whole extent.  Delete the entry.
5112                  */
5113                 xfs_iext_remove(ip, icur, state);
5114                 xfs_iext_prev(ifp, icur);
5115                 break;
5116         case BMAP_LEFT_FILLING:
5117                 /*
5118                  * Deleting the first part of the extent.
5119                  */
5120                 got->br_startoff = del_endoff;
5121                 got->br_blockcount -= del->br_blockcount;
5122                 got->br_startblock = del->br_startblock + del->br_blockcount;
5123                 xfs_iext_update_extent(ip, state, icur, got);
5124                 break;
5125         case BMAP_RIGHT_FILLING:
5126                 /*
5127                  * Deleting the last part of the extent.
5128                  */
5129                 got->br_blockcount -= del->br_blockcount;
5130                 xfs_iext_update_extent(ip, state, icur, got);
5131                 break;
5132         case 0:
5133                 /*
5134                  * Deleting the middle of the extent.
5135                  */
5136                 got->br_blockcount = del->br_startoff - got->br_startoff;
5137
5138                 new.br_startoff = del_endoff;
5139                 new.br_blockcount = got_endoff - del_endoff;
5140                 new.br_state = got->br_state;
5141                 new.br_startblock = del->br_startblock + del->br_blockcount;
5142
5143                 xfs_iext_update_extent(ip, state, icur, got);
5144                 xfs_iext_next(ifp, icur);
5145                 xfs_iext_insert(ip, icur, &new, state);
5146                 break;
5147         }
5148         ip->i_delayed_blks -= del->br_blockcount;
5149 }
5150
5151 static int
5152 xfs_bmap_free_rtblocks(
5153         struct xfs_trans        *tp,
5154         struct xfs_bmbt_irec    *del)
5155 {
5156         struct xfs_rtgroup      *rtg;
5157         int                     error;
5158
5159         rtg = xfs_rtgroup_grab(tp->t_mountp, 0);
5160         if (!rtg)
5161                 return -EIO;
5162
5163         /*
5164          * Ensure the bitmap and summary inodes are locked and joined to the
5165          * transaction before modifying them.
5166          */
5167         if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) {
5168                 tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED;
5169                 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP);
5170                 xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_BITMAP);
5171         }
5172
5173         error = xfs_rtfree_blocks(tp, rtg, del->br_startblock,
5174                         del->br_blockcount);
5175         xfs_rtgroup_rele(rtg);
5176         return error;
5177 }
5178
5179 /*
5180  * Called by xfs_bmapi to update file extent records and the btree
5181  * after removing space.
5182  */
5183 STATIC int                              /* error */
5184 xfs_bmap_del_extent_real(
5185         xfs_inode_t             *ip,    /* incore inode pointer */
5186         xfs_trans_t             *tp,    /* current transaction pointer */
5187         struct xfs_iext_cursor  *icur,
5188         struct xfs_btree_cur    *cur,   /* if null, not a btree */
5189         xfs_bmbt_irec_t         *del,   /* data to remove from extents */
5190         int                     *logflagsp, /* inode logging flags */
5191         int                     whichfork, /* data or attr fork */
5192         uint32_t                bflags) /* bmapi flags */
5193 {
5194         xfs_fsblock_t           del_endblock=0; /* first block past del */
5195         xfs_fileoff_t           del_endoff;     /* first offset past del */
5196         int                     error = 0;      /* error return value */
5197         struct xfs_bmbt_irec    got;    /* current extent entry */
5198         xfs_fileoff_t           got_endoff;     /* first offset past got */
5199         int                     i;      /* temp state */
5200         struct xfs_ifork        *ifp;   /* inode fork pointer */
5201         xfs_mount_t             *mp;    /* mount structure */
5202         xfs_filblks_t           nblks;  /* quota/sb block count */
5203         xfs_bmbt_irec_t         new;    /* new record to be inserted */
5204         /* REFERENCED */
5205         uint                    qfield; /* quota field to update */
5206         uint32_t                state = xfs_bmap_fork_to_state(whichfork);
5207         struct xfs_bmbt_irec    old;
5208
5209         *logflagsp = 0;
5210
5211         mp = ip->i_mount;
5212         XFS_STATS_INC(mp, xs_del_exlist);
5213
5214         ifp = xfs_ifork_ptr(ip, whichfork);
5215         ASSERT(del->br_blockcount > 0);
5216         xfs_iext_get_extent(ifp, icur, &got);
5217         ASSERT(got.br_startoff <= del->br_startoff);
5218         del_endoff = del->br_startoff + del->br_blockcount;
5219         got_endoff = got.br_startoff + got.br_blockcount;
5220         ASSERT(got_endoff >= del_endoff);
5221         ASSERT(!isnullstartblock(got.br_startblock));
5222         qfield = 0;
5223
5224         /*
5225          * If it's the case where the directory code is running with no block
5226          * reservation, and the deleted block is in the middle of its extent,
5227          * and the resulting insert of an extent would cause transformation to
5228          * btree format, then reject it.  The calling code will then swap blocks
5229          * around instead.  We have to do this now, rather than waiting for the
5230          * conversion to btree format, since the transaction will be dirty then.
5231          */
5232         if (tp->t_blk_res == 0 &&
5233             ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5234             ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5235             del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5236                 return -ENOSPC;
5237
5238         *logflagsp = XFS_ILOG_CORE;
5239         if (xfs_ifork_is_realtime(ip, whichfork))
5240                 qfield = XFS_TRANS_DQ_RTBCOUNT;
5241         else
5242                 qfield = XFS_TRANS_DQ_BCOUNT;
5243         nblks = del->br_blockcount;
5244
5245         del_endblock = del->br_startblock + del->br_blockcount;
5246         if (cur) {
5247                 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5248                 if (error)
5249                         return error;
5250                 if (XFS_IS_CORRUPT(mp, i != 1)) {
5251                         xfs_btree_mark_sick(cur);
5252                         return -EFSCORRUPTED;
5253                 }
5254         }
5255
5256         if (got.br_startoff == del->br_startoff)
5257                 state |= BMAP_LEFT_FILLING;
5258         if (got_endoff == del_endoff)
5259                 state |= BMAP_RIGHT_FILLING;
5260
5261         switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5262         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5263                 /*
5264                  * Matches the whole extent.  Delete the entry.
5265                  */
5266                 xfs_iext_remove(ip, icur, state);
5267                 xfs_iext_prev(ifp, icur);
5268                 ifp->if_nextents--;
5269
5270                 *logflagsp |= XFS_ILOG_CORE;
5271                 if (!cur) {
5272                         *logflagsp |= xfs_ilog_fext(whichfork);
5273                         break;
5274                 }
5275                 if ((error = xfs_btree_delete(cur, &i)))
5276                         return error;
5277                 if (XFS_IS_CORRUPT(mp, i != 1)) {
5278                         xfs_btree_mark_sick(cur);
5279                         return -EFSCORRUPTED;
5280                 }
5281                 break;
5282         case BMAP_LEFT_FILLING:
5283                 /*
5284                  * Deleting the first part of the extent.
5285                  */
5286                 got.br_startoff = del_endoff;
5287                 got.br_startblock = del_endblock;
5288                 got.br_blockcount -= del->br_blockcount;
5289                 xfs_iext_update_extent(ip, state, icur, &got);
5290                 if (!cur) {
5291                         *logflagsp |= xfs_ilog_fext(whichfork);
5292                         break;
5293                 }
5294                 error = xfs_bmbt_update(cur, &got);
5295                 if (error)
5296                         return error;
5297                 break;
5298         case BMAP_RIGHT_FILLING:
5299                 /*
5300                  * Deleting the last part of the extent.
5301                  */
5302                 got.br_blockcount -= del->br_blockcount;
5303                 xfs_iext_update_extent(ip, state, icur, &got);
5304                 if (!cur) {
5305                         *logflagsp |= xfs_ilog_fext(whichfork);
5306                         break;
5307                 }
5308                 error = xfs_bmbt_update(cur, &got);
5309                 if (error)
5310                         return error;
5311                 break;
5312         case 0:
5313                 /*
5314                  * Deleting the middle of the extent.
5315                  */
5316
5317                 old = got;
5318
5319                 got.br_blockcount = del->br_startoff - got.br_startoff;
5320                 xfs_iext_update_extent(ip, state, icur, &got);
5321
5322                 new.br_startoff = del_endoff;
5323                 new.br_blockcount = got_endoff - del_endoff;
5324                 new.br_state = got.br_state;
5325                 new.br_startblock = del_endblock;
5326
5327                 *logflagsp |= XFS_ILOG_CORE;
5328                 if (cur) {
5329                         error = xfs_bmbt_update(cur, &got);
5330                         if (error)
5331                                 return error;
5332                         error = xfs_btree_increment(cur, 0, &i);
5333                         if (error)
5334                                 return error;
5335                         cur->bc_rec.b = new;
5336                         error = xfs_btree_insert(cur, &i);
5337                         if (error && error != -ENOSPC)
5338                                 return error;
5339                         /*
5340                          * If get no-space back from btree insert, it tried a
5341                          * split, and we have a zero block reservation.  Fix up
5342                          * our state and return the error.
5343                          */
5344                         if (error == -ENOSPC) {
5345                                 /*
5346                                  * Reset the cursor, don't trust it after any
5347                                  * insert operation.
5348                                  */
5349                                 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5350                                 if (error)
5351                                         return error;
5352                                 if (XFS_IS_CORRUPT(mp, i != 1)) {
5353                                         xfs_btree_mark_sick(cur);
5354                                         return -EFSCORRUPTED;
5355                                 }
5356                                 /*
5357                                  * Update the btree record back
5358                                  * to the original value.
5359                                  */
5360                                 error = xfs_bmbt_update(cur, &old);
5361                                 if (error)
5362                                         return error;
5363                                 /*
5364                                  * Reset the extent record back
5365                                  * to the original value.
5366                                  */
5367                                 xfs_iext_update_extent(ip, state, icur, &old);
5368                                 *logflagsp = 0;
5369                                 return -ENOSPC;
5370                         }
5371                         if (XFS_IS_CORRUPT(mp, i != 1)) {
5372                                 xfs_btree_mark_sick(cur);
5373                                 return -EFSCORRUPTED;
5374                         }
5375                 } else
5376                         *logflagsp |= xfs_ilog_fext(whichfork);
5377
5378                 ifp->if_nextents++;
5379                 xfs_iext_next(ifp, icur);
5380                 xfs_iext_insert(ip, icur, &new, state);
5381                 break;
5382         }
5383
5384         /* remove reverse mapping */
5385         xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5386
5387         /*
5388          * If we need to, add to list of extents to delete.
5389          */
5390         if (!(bflags & XFS_BMAPI_REMAP)) {
5391                 bool    isrt = xfs_ifork_is_realtime(ip, whichfork);
5392
5393                 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5394                         xfs_refcount_decrease_extent(tp, del);
5395                 } else if (isrt && !xfs_has_rtgroups(mp)) {
5396                         error = xfs_bmap_free_rtblocks(tp, del);
5397                 } else {
5398                         unsigned int    efi_flags = 0;
5399
5400                         if ((bflags & XFS_BMAPI_NODISCARD) ||
5401                             del->br_state == XFS_EXT_UNWRITTEN)
5402                                 efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD;
5403
5404                         /*
5405                          * Historically, we did not use EFIs to free realtime
5406                          * extents.  However, when reverse mapping is enabled,
5407                          * we must maintain the same order of operations as the
5408                          * data device, which is: Remove the file mapping,
5409                          * remove the reverse mapping, and then free the
5410                          * blocks.  Reflink for realtime volumes requires the
5411                          * same sort of ordering.  Both features rely on
5412                          * rtgroups, so let's gate rt EFI usage on rtgroups.
5413                          */
5414                         if (isrt)
5415                                 efi_flags |= XFS_FREE_EXTENT_REALTIME;
5416
5417                         error = xfs_free_extent_later(tp, del->br_startblock,
5418                                         del->br_blockcount, NULL,
5419                                         XFS_AG_RESV_NONE, efi_flags);
5420                 }
5421                 if (error)
5422                         return error;
5423         }
5424
5425         /*
5426          * Adjust inode # blocks in the file.
5427          */
5428         if (nblks)
5429                 ip->i_nblocks -= nblks;
5430         /*
5431          * Adjust quota data.
5432          */
5433         if (qfield && !(bflags & XFS_BMAPI_REMAP))
5434                 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5435
5436         return 0;
5437 }
5438
5439 /*
5440  * Unmap (remove) blocks from a file.
5441  * If nexts is nonzero then the number of extents to remove is limited to
5442  * that value.  If not all extents in the block range can be removed then
5443  * *done is set.
5444  */
5445 static int
5446 __xfs_bunmapi(
5447         struct xfs_trans        *tp,            /* transaction pointer */
5448         struct xfs_inode        *ip,            /* incore inode */
5449         xfs_fileoff_t           start,          /* first file offset deleted */
5450         xfs_filblks_t           *rlen,          /* i/o: amount remaining */
5451         uint32_t                flags,          /* misc flags */
5452         xfs_extnum_t            nexts)          /* number of extents max */
5453 {
5454         struct xfs_btree_cur    *cur;           /* bmap btree cursor */
5455         struct xfs_bmbt_irec    del;            /* extent being deleted */
5456         int                     error;          /* error return value */
5457         xfs_extnum_t            extno;          /* extent number in list */
5458         struct xfs_bmbt_irec    got;            /* current extent record */
5459         struct xfs_ifork        *ifp;           /* inode fork pointer */
5460         int                     isrt;           /* freeing in rt area */
5461         int                     logflags;       /* transaction logging flags */
5462         xfs_extlen_t            mod;            /* rt extent offset */
5463         struct xfs_mount        *mp = ip->i_mount;
5464         int                     tmp_logflags;   /* partial logging flags */
5465         int                     wasdel;         /* was a delayed alloc extent */
5466         int                     whichfork;      /* data or attribute fork */
5467         xfs_filblks_t           len = *rlen;    /* length to unmap in file */
5468         xfs_fileoff_t           end;
5469         struct xfs_iext_cursor  icur;
5470         bool                    done = false;
5471
5472         trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5473
5474         whichfork = xfs_bmapi_whichfork(flags);
5475         ASSERT(whichfork != XFS_COW_FORK);
5476         ifp = xfs_ifork_ptr(ip, whichfork);
5477         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) {
5478                 xfs_bmap_mark_sick(ip, whichfork);
5479                 return -EFSCORRUPTED;
5480         }
5481         if (xfs_is_shutdown(mp))
5482                 return -EIO;
5483
5484         xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
5485         ASSERT(len > 0);
5486         ASSERT(nexts >= 0);
5487
5488         error = xfs_iread_extents(tp, ip, whichfork);
5489         if (error)
5490                 return error;
5491
5492         if (xfs_iext_count(ifp) == 0) {
5493                 *rlen = 0;
5494                 return 0;
5495         }
5496         XFS_STATS_INC(mp, xs_blk_unmap);
5497         isrt = xfs_ifork_is_realtime(ip, whichfork);
5498         end = start + len;
5499
5500         if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5501                 *rlen = 0;
5502                 return 0;
5503         }
5504         end--;
5505
5506         logflags = 0;
5507         if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5508                 ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5509                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5510         } else
5511                 cur = NULL;
5512
5513         extno = 0;
5514         while (end != (xfs_fileoff_t)-1 && end >= start &&
5515                (nexts == 0 || extno < nexts)) {
5516                 /*
5517                  * Is the found extent after a hole in which end lives?
5518                  * Just back up to the previous extent, if so.
5519                  */
5520                 if (got.br_startoff > end &&
5521                     !xfs_iext_prev_extent(ifp, &icur, &got)) {
5522                         done = true;
5523                         break;
5524                 }
5525                 /*
5526                  * Is the last block of this extent before the range
5527                  * we're supposed to delete?  If so, we're done.
5528                  */
5529                 end = XFS_FILEOFF_MIN(end,
5530                         got.br_startoff + got.br_blockcount - 1);
5531                 if (end < start)
5532                         break;
5533                 /*
5534                  * Then deal with the (possibly delayed) allocated space
5535                  * we found.
5536                  */
5537                 del = got;
5538                 wasdel = isnullstartblock(del.br_startblock);
5539
5540                 if (got.br_startoff < start) {
5541                         del.br_startoff = start;
5542                         del.br_blockcount -= start - got.br_startoff;
5543                         if (!wasdel)
5544                                 del.br_startblock += start - got.br_startoff;
5545                 }
5546                 if (del.br_startoff + del.br_blockcount > end + 1)
5547                         del.br_blockcount = end + 1 - del.br_startoff;
5548
5549                 if (!isrt || (flags & XFS_BMAPI_REMAP))
5550                         goto delete;
5551
5552                 mod = xfs_rtb_to_rtxoff(mp,
5553                                 del.br_startblock + del.br_blockcount);
5554                 if (mod) {
5555                         /*
5556                          * Realtime extent not lined up at the end.
5557                          * The extent could have been split into written
5558                          * and unwritten pieces, or we could just be
5559                          * unmapping part of it.  But we can't really
5560                          * get rid of part of a realtime extent.
5561                          */
5562                         if (del.br_state == XFS_EXT_UNWRITTEN) {
5563                                 /*
5564                                  * This piece is unwritten, or we're not
5565                                  * using unwritten extents.  Skip over it.
5566                                  */
5567                                 ASSERT((flags & XFS_BMAPI_REMAP) || end >= mod);
5568                                 end -= mod > del.br_blockcount ?
5569                                         del.br_blockcount : mod;
5570                                 if (end < got.br_startoff &&
5571                                     !xfs_iext_prev_extent(ifp, &icur, &got)) {
5572                                         done = true;
5573                                         break;
5574                                 }
5575                                 continue;
5576                         }
5577                         /*
5578                          * It's written, turn it unwritten.
5579                          * This is better than zeroing it.
5580                          */
5581                         ASSERT(del.br_state == XFS_EXT_NORM);
5582                         ASSERT(tp->t_blk_res > 0);
5583                         /*
5584                          * If this spans a realtime extent boundary,
5585                          * chop it back to the start of the one we end at.
5586                          */
5587                         if (del.br_blockcount > mod) {
5588                                 del.br_startoff += del.br_blockcount - mod;
5589                                 del.br_startblock += del.br_blockcount - mod;
5590                                 del.br_blockcount = mod;
5591                         }
5592                         del.br_state = XFS_EXT_UNWRITTEN;
5593                         error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5594                                         whichfork, &icur, &cur, &del,
5595                                         &logflags);
5596                         if (error)
5597                                 goto error0;
5598                         goto nodelete;
5599                 }
5600
5601                 mod = xfs_rtb_to_rtxoff(mp, del.br_startblock);
5602                 if (mod) {
5603                         xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5604
5605                         /*
5606                          * Realtime extent is lined up at the end but not
5607                          * at the front.  We'll get rid of full extents if
5608                          * we can.
5609                          */
5610                         if (del.br_blockcount > off) {
5611                                 del.br_blockcount -= off;
5612                                 del.br_startoff += off;
5613                                 del.br_startblock += off;
5614                         } else if (del.br_startoff == start &&
5615                                    (del.br_state == XFS_EXT_UNWRITTEN ||
5616                                     tp->t_blk_res == 0)) {
5617                                 /*
5618                                  * Can't make it unwritten.  There isn't
5619                                  * a full extent here so just skip it.
5620                                  */
5621                                 ASSERT(end >= del.br_blockcount);
5622                                 end -= del.br_blockcount;
5623                                 if (got.br_startoff > end &&
5624                                     !xfs_iext_prev_extent(ifp, &icur, &got)) {
5625                                         done = true;
5626                                         break;
5627                                 }
5628                                 continue;
5629                         } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5630                                 struct xfs_bmbt_irec    prev;
5631                                 xfs_fileoff_t           unwrite_start;
5632
5633                                 /*
5634                                  * This one is already unwritten.
5635                                  * It must have a written left neighbor.
5636                                  * Unwrite the killed part of that one and
5637                                  * try again.
5638                                  */
5639                                 if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5640                                         ASSERT(0);
5641                                 ASSERT(prev.br_state == XFS_EXT_NORM);
5642                                 ASSERT(!isnullstartblock(prev.br_startblock));
5643                                 ASSERT(del.br_startblock ==
5644                                        prev.br_startblock + prev.br_blockcount);
5645                                 unwrite_start = max3(start,
5646                                                      del.br_startoff - mod,
5647                                                      prev.br_startoff);
5648                                 mod = unwrite_start - prev.br_startoff;
5649                                 prev.br_startoff = unwrite_start;
5650                                 prev.br_startblock += mod;
5651                                 prev.br_blockcount -= mod;
5652                                 prev.br_state = XFS_EXT_UNWRITTEN;
5653                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5654                                                 ip, whichfork, &icur, &cur,
5655                                                 &prev, &logflags);
5656                                 if (error)
5657                                         goto error0;
5658                                 goto nodelete;
5659                         } else {
5660                                 ASSERT(del.br_state == XFS_EXT_NORM);
5661                                 del.br_state = XFS_EXT_UNWRITTEN;
5662                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5663                                                 ip, whichfork, &icur, &cur,
5664                                                 &del, &logflags);
5665                                 if (error)
5666                                         goto error0;
5667                                 goto nodelete;
5668                         }
5669                 }
5670
5671 delete:
5672                 if (wasdel) {
5673                         xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
5674                 } else {
5675                         error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5676                                         &del, &tmp_logflags, whichfork,
5677                                         flags);
5678                         logflags |= tmp_logflags;
5679                         if (error)
5680                                 goto error0;
5681                 }
5682
5683                 end = del.br_startoff - 1;
5684 nodelete:
5685                 /*
5686                  * If not done go on to the next (previous) record.
5687                  */
5688                 if (end != (xfs_fileoff_t)-1 && end >= start) {
5689                         if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5690                             (got.br_startoff > end &&
5691                              !xfs_iext_prev_extent(ifp, &icur, &got))) {
5692                                 done = true;
5693                                 break;
5694                         }
5695                         extno++;
5696                 }
5697         }
5698         if (done || end == (xfs_fileoff_t)-1 || end < start)
5699                 *rlen = 0;
5700         else
5701                 *rlen = end - start + 1;
5702
5703         /*
5704          * Convert to a btree if necessary.
5705          */
5706         if (xfs_bmap_needs_btree(ip, whichfork)) {
5707                 ASSERT(cur == NULL);
5708                 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5709                                 &tmp_logflags, whichfork);
5710                 logflags |= tmp_logflags;
5711         } else {
5712                 error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5713                         whichfork);
5714         }
5715
5716 error0:
5717         /*
5718          * Log everything.  Do this after conversion, there's no point in
5719          * logging the extent records if we've converted to btree format.
5720          */
5721         if ((logflags & xfs_ilog_fext(whichfork)) &&
5722             ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5723                 logflags &= ~xfs_ilog_fext(whichfork);
5724         else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5725                  ifp->if_format != XFS_DINODE_FMT_BTREE)
5726                 logflags &= ~xfs_ilog_fbroot(whichfork);
5727         /*
5728          * Log inode even in the error case, if the transaction
5729          * is dirty we'll need to shut down the filesystem.
5730          */
5731         if (logflags)
5732                 xfs_trans_log_inode(tp, ip, logflags);
5733         if (cur) {
5734                 if (!error)
5735                         cur->bc_bmap.allocated = 0;
5736                 xfs_btree_del_cursor(cur, error);
5737         }
5738         return error;
5739 }
5740
5741 /* Unmap a range of a file. */
5742 int
5743 xfs_bunmapi(
5744         xfs_trans_t             *tp,
5745         struct xfs_inode        *ip,
5746         xfs_fileoff_t           bno,
5747         xfs_filblks_t           len,
5748         uint32_t                flags,
5749         xfs_extnum_t            nexts,
5750         int                     *done)
5751 {
5752         int                     error;
5753
5754         error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5755         *done = (len == 0);
5756         return error;
5757 }
5758
5759 /*
5760  * Determine whether an extent shift can be accomplished by a merge with the
5761  * extent that precedes the target hole of the shift.
5762  */
5763 STATIC bool
5764 xfs_bmse_can_merge(
5765         struct xfs_inode        *ip,
5766         int                     whichfork,
5767         struct xfs_bmbt_irec    *left,  /* preceding extent */
5768         struct xfs_bmbt_irec    *got,   /* current extent to shift */
5769         xfs_fileoff_t           shift)  /* shift fsb */
5770 {
5771         xfs_fileoff_t           startoff;
5772
5773         startoff = got->br_startoff - shift;
5774
5775         /*
5776          * The extent, once shifted, must be adjacent in-file and on-disk with
5777          * the preceding extent.
5778          */
5779         if ((left->br_startoff + left->br_blockcount != startoff) ||
5780             (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5781             (left->br_state != got->br_state) ||
5782             (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN) ||
5783             !xfs_bmap_same_rtgroup(ip, whichfork, left, got))
5784                 return false;
5785
5786         return true;
5787 }
5788
5789 /*
5790  * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5791  * hole in the file. If an extent shift would result in the extent being fully
5792  * adjacent to the extent that currently precedes the hole, we can merge with
5793  * the preceding extent rather than do the shift.
5794  *
5795  * This function assumes the caller has verified a shift-by-merge is possible
5796  * with the provided extents via xfs_bmse_can_merge().
5797  */
5798 STATIC int
5799 xfs_bmse_merge(
5800         struct xfs_trans                *tp,
5801         struct xfs_inode                *ip,
5802         int                             whichfork,
5803         xfs_fileoff_t                   shift,          /* shift fsb */
5804         struct xfs_iext_cursor          *icur,
5805         struct xfs_bmbt_irec            *got,           /* extent to shift */
5806         struct xfs_bmbt_irec            *left,          /* preceding extent */
5807         struct xfs_btree_cur            *cur,
5808         int                             *logflags)      /* output */
5809 {
5810         struct xfs_ifork                *ifp = xfs_ifork_ptr(ip, whichfork);
5811         struct xfs_bmbt_irec            new;
5812         xfs_filblks_t                   blockcount;
5813         int                             error, i;
5814         struct xfs_mount                *mp = ip->i_mount;
5815
5816         blockcount = left->br_blockcount + got->br_blockcount;
5817
5818         xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
5819         ASSERT(xfs_bmse_can_merge(ip, whichfork, left, got, shift));
5820
5821         new = *left;
5822         new.br_blockcount = blockcount;
5823
5824         /*
5825          * Update the on-disk extent count, the btree if necessary and log the
5826          * inode.
5827          */
5828         ifp->if_nextents--;
5829         *logflags |= XFS_ILOG_CORE;
5830         if (!cur) {
5831                 *logflags |= XFS_ILOG_DEXT;
5832                 goto done;
5833         }
5834
5835         /* lookup and remove the extent to merge */
5836         error = xfs_bmbt_lookup_eq(cur, got, &i);
5837         if (error)
5838                 return error;
5839         if (XFS_IS_CORRUPT(mp, i != 1)) {
5840                 xfs_btree_mark_sick(cur);
5841                 return -EFSCORRUPTED;
5842         }
5843
5844         error = xfs_btree_delete(cur, &i);
5845         if (error)
5846                 return error;
5847         if (XFS_IS_CORRUPT(mp, i != 1)) {
5848                 xfs_btree_mark_sick(cur);
5849                 return -EFSCORRUPTED;
5850         }
5851
5852         /* lookup and update size of the previous extent */
5853         error = xfs_bmbt_lookup_eq(cur, left, &i);
5854         if (error)
5855                 return error;
5856         if (XFS_IS_CORRUPT(mp, i != 1)) {
5857                 xfs_btree_mark_sick(cur);
5858                 return -EFSCORRUPTED;
5859         }
5860
5861         error = xfs_bmbt_update(cur, &new);
5862         if (error)
5863                 return error;
5864
5865         /* change to extent format if required after extent removal */
5866         error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5867         if (error)
5868                 return error;
5869
5870 done:
5871         xfs_iext_remove(ip, icur, 0);
5872         xfs_iext_prev(ifp, icur);
5873         xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5874                         &new);
5875
5876         /* update reverse mapping. rmap functions merge the rmaps for us */
5877         xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5878         memcpy(&new, got, sizeof(new));
5879         new.br_startoff = left->br_startoff + left->br_blockcount;
5880         xfs_rmap_map_extent(tp, ip, whichfork, &new);
5881         return 0;
5882 }
5883
5884 static int
5885 xfs_bmap_shift_update_extent(
5886         struct xfs_trans        *tp,
5887         struct xfs_inode        *ip,
5888         int                     whichfork,
5889         struct xfs_iext_cursor  *icur,
5890         struct xfs_bmbt_irec    *got,
5891         struct xfs_btree_cur    *cur,
5892         int                     *logflags,
5893         xfs_fileoff_t           startoff)
5894 {
5895         struct xfs_mount        *mp = ip->i_mount;
5896         struct xfs_bmbt_irec    prev = *got;
5897         int                     error, i;
5898
5899         *logflags |= XFS_ILOG_CORE;
5900
5901         got->br_startoff = startoff;
5902
5903         if (cur) {
5904                 error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5905                 if (error)
5906                         return error;
5907                 if (XFS_IS_CORRUPT(mp, i != 1)) {
5908                         xfs_btree_mark_sick(cur);
5909                         return -EFSCORRUPTED;
5910                 }
5911
5912                 error = xfs_bmbt_update(cur, got);
5913                 if (error)
5914                         return error;
5915         } else {
5916                 *logflags |= XFS_ILOG_DEXT;
5917         }
5918
5919         xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5920                         got);
5921
5922         /* update reverse mapping */
5923         xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5924         xfs_rmap_map_extent(tp, ip, whichfork, got);
5925         return 0;
5926 }
5927
5928 int
5929 xfs_bmap_collapse_extents(
5930         struct xfs_trans        *tp,
5931         struct xfs_inode        *ip,
5932         xfs_fileoff_t           *next_fsb,
5933         xfs_fileoff_t           offset_shift_fsb,
5934         bool                    *done)
5935 {
5936         int                     whichfork = XFS_DATA_FORK;
5937         struct xfs_mount        *mp = ip->i_mount;
5938         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
5939         struct xfs_btree_cur    *cur = NULL;
5940         struct xfs_bmbt_irec    got, prev;
5941         struct xfs_iext_cursor  icur;
5942         xfs_fileoff_t           new_startoff;
5943         int                     error = 0;
5944         int                     logflags = 0;
5945
5946         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5947             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5948                 xfs_bmap_mark_sick(ip, whichfork);
5949                 return -EFSCORRUPTED;
5950         }
5951
5952         if (xfs_is_shutdown(mp))
5953                 return -EIO;
5954
5955         xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
5956
5957         error = xfs_iread_extents(tp, ip, whichfork);
5958         if (error)
5959                 return error;
5960
5961         if (ifp->if_format == XFS_DINODE_FMT_BTREE)
5962                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5963
5964         if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5965                 *done = true;
5966                 goto del_cursor;
5967         }
5968         if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5969                 xfs_bmap_mark_sick(ip, whichfork);
5970                 error = -EFSCORRUPTED;
5971                 goto del_cursor;
5972         }
5973
5974         new_startoff = got.br_startoff - offset_shift_fsb;
5975         if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5976                 if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5977                         error = -EINVAL;
5978                         goto del_cursor;
5979                 }
5980
5981                 if (xfs_bmse_can_merge(ip, whichfork, &prev, &got,
5982                                 offset_shift_fsb)) {
5983                         error = xfs_bmse_merge(tp, ip, whichfork,
5984                                         offset_shift_fsb, &icur, &got, &prev,
5985                                         cur, &logflags);
5986                         if (error)
5987                                 goto del_cursor;
5988                         goto done;
5989                 }
5990         } else {
5991                 if (got.br_startoff < offset_shift_fsb) {
5992                         error = -EINVAL;
5993                         goto del_cursor;
5994                 }
5995         }
5996
5997         error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5998                         cur, &logflags, new_startoff);
5999         if (error)
6000                 goto del_cursor;
6001
6002 done:
6003         if (!xfs_iext_next_extent(ifp, &icur, &got)) {
6004                 *done = true;
6005                 goto del_cursor;
6006         }
6007
6008         *next_fsb = got.br_startoff;
6009 del_cursor:
6010         if (cur)
6011                 xfs_btree_del_cursor(cur, error);
6012         if (logflags)
6013                 xfs_trans_log_inode(tp, ip, logflags);
6014         return error;
6015 }
6016
6017 /* Make sure we won't be right-shifting an extent past the maximum bound. */
6018 int
6019 xfs_bmap_can_insert_extents(
6020         struct xfs_inode        *ip,
6021         xfs_fileoff_t           off,
6022         xfs_fileoff_t           shift)
6023 {
6024         struct xfs_bmbt_irec    got;
6025         int                     is_empty;
6026         int                     error = 0;
6027
6028         xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
6029
6030         if (xfs_is_shutdown(ip->i_mount))
6031                 return -EIO;
6032
6033         xfs_ilock(ip, XFS_ILOCK_EXCL);
6034         error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
6035         if (!error && !is_empty && got.br_startoff >= off &&
6036             ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
6037                 error = -EINVAL;
6038         xfs_iunlock(ip, XFS_ILOCK_EXCL);
6039
6040         return error;
6041 }
6042
6043 int
6044 xfs_bmap_insert_extents(
6045         struct xfs_trans        *tp,
6046         struct xfs_inode        *ip,
6047         xfs_fileoff_t           *next_fsb,
6048         xfs_fileoff_t           offset_shift_fsb,
6049         bool                    *done,
6050         xfs_fileoff_t           stop_fsb)
6051 {
6052         int                     whichfork = XFS_DATA_FORK;
6053         struct xfs_mount        *mp = ip->i_mount;
6054         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
6055         struct xfs_btree_cur    *cur = NULL;
6056         struct xfs_bmbt_irec    got, next;
6057         struct xfs_iext_cursor  icur;
6058         xfs_fileoff_t           new_startoff;
6059         int                     error = 0;
6060         int                     logflags = 0;
6061
6062         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6063             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6064                 xfs_bmap_mark_sick(ip, whichfork);
6065                 return -EFSCORRUPTED;
6066         }
6067
6068         if (xfs_is_shutdown(mp))
6069                 return -EIO;
6070
6071         xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
6072
6073         error = xfs_iread_extents(tp, ip, whichfork);
6074         if (error)
6075                 return error;
6076
6077         if (ifp->if_format == XFS_DINODE_FMT_BTREE)
6078                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6079
6080         if (*next_fsb == NULLFSBLOCK) {
6081                 xfs_iext_last(ifp, &icur);
6082                 if (!xfs_iext_get_extent(ifp, &icur, &got) ||
6083                     stop_fsb > got.br_startoff) {
6084                         *done = true;
6085                         goto del_cursor;
6086                 }
6087         } else {
6088                 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
6089                         *done = true;
6090                         goto del_cursor;
6091                 }
6092         }
6093         if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
6094                 xfs_bmap_mark_sick(ip, whichfork);
6095                 error = -EFSCORRUPTED;
6096                 goto del_cursor;
6097         }
6098
6099         if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
6100                 xfs_bmap_mark_sick(ip, whichfork);
6101                 error = -EFSCORRUPTED;
6102                 goto del_cursor;
6103         }
6104
6105         new_startoff = got.br_startoff + offset_shift_fsb;
6106         if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
6107                 if (new_startoff + got.br_blockcount > next.br_startoff) {
6108                         error = -EINVAL;
6109                         goto del_cursor;
6110                 }
6111
6112                 /*
6113                  * Unlike a left shift (which involves a hole punch), a right
6114                  * shift does not modify extent neighbors in any way.  We should
6115                  * never find mergeable extents in this scenario.  Check anyways
6116                  * and warn if we encounter two extents that could be one.
6117                  */
6118                 if (xfs_bmse_can_merge(ip, whichfork, &got, &next,
6119                                 offset_shift_fsb))
6120                         WARN_ON_ONCE(1);
6121         }
6122
6123         error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
6124                         cur, &logflags, new_startoff);
6125         if (error)
6126                 goto del_cursor;
6127
6128         if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
6129             stop_fsb >= got.br_startoff + got.br_blockcount) {
6130                 *done = true;
6131                 goto del_cursor;
6132         }
6133
6134         *next_fsb = got.br_startoff;
6135 del_cursor:
6136         if (cur)
6137                 xfs_btree_del_cursor(cur, error);
6138         if (logflags)
6139                 xfs_trans_log_inode(tp, ip, logflags);
6140         return error;
6141 }
6142
6143 /*
6144  * Splits an extent into two extents at split_fsb block such that it is the
6145  * first block of the current_ext. @ext is a target extent to be split.
6146  * @split_fsb is a block where the extents is split.  If split_fsb lies in a
6147  * hole or the first block of extents, just return 0.
6148  */
6149 int
6150 xfs_bmap_split_extent(
6151         struct xfs_trans        *tp,
6152         struct xfs_inode        *ip,
6153         xfs_fileoff_t           split_fsb)
6154 {
6155         int                             whichfork = XFS_DATA_FORK;
6156         struct xfs_ifork                *ifp = xfs_ifork_ptr(ip, whichfork);
6157         struct xfs_btree_cur            *cur = NULL;
6158         struct xfs_bmbt_irec            got;
6159         struct xfs_bmbt_irec            new; /* split extent */
6160         struct xfs_mount                *mp = ip->i_mount;
6161         xfs_fsblock_t                   gotblkcnt; /* new block count for got */
6162         struct xfs_iext_cursor          icur;
6163         int                             error = 0;
6164         int                             logflags = 0;
6165         int                             i = 0;
6166
6167         if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6168             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6169                 xfs_bmap_mark_sick(ip, whichfork);
6170                 return -EFSCORRUPTED;
6171         }
6172
6173         if (xfs_is_shutdown(mp))
6174                 return -EIO;
6175
6176         /* Read in all the extents */
6177         error = xfs_iread_extents(tp, ip, whichfork);
6178         if (error)
6179                 return error;
6180
6181         /*
6182          * If there are not extents, or split_fsb lies in a hole we are done.
6183          */
6184         if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6185             got.br_startoff >= split_fsb)
6186                 return 0;
6187
6188         gotblkcnt = split_fsb - got.br_startoff;
6189         new.br_startoff = split_fsb;
6190         new.br_startblock = got.br_startblock + gotblkcnt;
6191         new.br_blockcount = got.br_blockcount - gotblkcnt;
6192         new.br_state = got.br_state;
6193
6194         if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
6195                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6196                 error = xfs_bmbt_lookup_eq(cur, &got, &i);
6197                 if (error)
6198                         goto del_cursor;
6199                 if (XFS_IS_CORRUPT(mp, i != 1)) {
6200                         xfs_btree_mark_sick(cur);
6201                         error = -EFSCORRUPTED;
6202                         goto del_cursor;
6203                 }
6204         }
6205
6206         got.br_blockcount = gotblkcnt;
6207         xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6208                         &got);
6209
6210         logflags = XFS_ILOG_CORE;
6211         if (cur) {
6212                 error = xfs_bmbt_update(cur, &got);
6213                 if (error)
6214                         goto del_cursor;
6215         } else
6216                 logflags |= XFS_ILOG_DEXT;
6217
6218         /* Add new extent */
6219         xfs_iext_next(ifp, &icur);
6220         xfs_iext_insert(ip, &icur, &new, 0);
6221         ifp->if_nextents++;
6222
6223         if (cur) {
6224                 error = xfs_bmbt_lookup_eq(cur, &new, &i);
6225                 if (error)
6226                         goto del_cursor;
6227                 if (XFS_IS_CORRUPT(mp, i != 0)) {
6228                         xfs_btree_mark_sick(cur);
6229                         error = -EFSCORRUPTED;
6230                         goto del_cursor;
6231                 }
6232                 error = xfs_btree_insert(cur, &i);
6233                 if (error)
6234                         goto del_cursor;
6235                 if (XFS_IS_CORRUPT(mp, i != 1)) {
6236                         xfs_btree_mark_sick(cur);
6237                         error = -EFSCORRUPTED;
6238                         goto del_cursor;
6239                 }
6240         }
6241
6242         /*
6243          * Convert to a btree if necessary.
6244          */
6245         if (xfs_bmap_needs_btree(ip, whichfork)) {
6246                 int tmp_logflags; /* partial log flag return val */
6247
6248                 ASSERT(cur == NULL);
6249                 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6250                                 &tmp_logflags, whichfork);
6251                 logflags |= tmp_logflags;
6252         }
6253
6254 del_cursor:
6255         if (cur) {
6256                 cur->bc_bmap.allocated = 0;
6257                 xfs_btree_del_cursor(cur, error);
6258         }
6259
6260         if (logflags)
6261                 xfs_trans_log_inode(tp, ip, logflags);
6262         return error;
6263 }
6264
6265 /* Record a bmap intent. */
6266 static inline void
6267 __xfs_bmap_add(
6268         struct xfs_trans                *tp,
6269         enum xfs_bmap_intent_type       type,
6270         struct xfs_inode                *ip,
6271         int                             whichfork,
6272         struct xfs_bmbt_irec            *bmap)
6273 {
6274         struct xfs_bmap_intent          *bi;
6275
6276         if ((whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) ||
6277             bmap->br_startblock == HOLESTARTBLOCK ||
6278             bmap->br_startblock == DELAYSTARTBLOCK)
6279                 return;
6280
6281         bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
6282         INIT_LIST_HEAD(&bi->bi_list);
6283         bi->bi_type = type;
6284         bi->bi_owner = ip;
6285         bi->bi_whichfork = whichfork;
6286         bi->bi_bmap = *bmap;
6287
6288         xfs_bmap_defer_add(tp, bi);
6289 }
6290
6291 /* Map an extent into a file. */
6292 void
6293 xfs_bmap_map_extent(
6294         struct xfs_trans        *tp,
6295         struct xfs_inode        *ip,
6296         int                     whichfork,
6297         struct xfs_bmbt_irec    *PREV)
6298 {
6299         __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, whichfork, PREV);
6300 }
6301
6302 /* Unmap an extent out of a file. */
6303 void
6304 xfs_bmap_unmap_extent(
6305         struct xfs_trans        *tp,
6306         struct xfs_inode        *ip,
6307         int                     whichfork,
6308         struct xfs_bmbt_irec    *PREV)
6309 {
6310         __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, whichfork, PREV);
6311 }
6312
6313 /*
6314  * Process one of the deferred bmap operations.  We pass back the
6315  * btree cursor to maintain our lock on the bmapbt between calls.
6316  */
6317 int
6318 xfs_bmap_finish_one(
6319         struct xfs_trans                *tp,
6320         struct xfs_bmap_intent          *bi)
6321 {
6322         struct xfs_bmbt_irec            *bmap = &bi->bi_bmap;
6323         int                             error = 0;
6324         int                             flags = 0;
6325
6326         if (bi->bi_whichfork == XFS_ATTR_FORK)
6327                 flags |= XFS_BMAPI_ATTRFORK;
6328
6329         ASSERT(tp->t_highest_agno == NULLAGNUMBER);
6330
6331         trace_xfs_bmap_deferred(bi);
6332
6333         if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
6334                 return -EIO;
6335
6336         switch (bi->bi_type) {
6337         case XFS_BMAP_MAP:
6338                 if (bi->bi_bmap.br_state == XFS_EXT_UNWRITTEN)
6339                         flags |= XFS_BMAPI_PREALLOC;
6340                 error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff,
6341                                 bmap->br_blockcount, bmap->br_startblock,
6342                                 flags);
6343                 bmap->br_blockcount = 0;
6344                 break;
6345         case XFS_BMAP_UNMAP:
6346                 error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff,
6347                                 &bmap->br_blockcount, flags | XFS_BMAPI_REMAP,
6348                                 1);
6349                 break;
6350         default:
6351                 ASSERT(0);
6352                 xfs_bmap_mark_sick(bi->bi_owner, bi->bi_whichfork);
6353                 error = -EFSCORRUPTED;
6354         }
6355
6356         return error;
6357 }
6358
6359 /* Check that an extent does not have invalid flags or bad ranges. */
6360 xfs_failaddr_t
6361 xfs_bmap_validate_extent_raw(
6362         struct xfs_mount        *mp,
6363         bool                    rtfile,
6364         int                     whichfork,
6365         struct xfs_bmbt_irec    *irec)
6366 {
6367         if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
6368                 return __this_address;
6369
6370         if (rtfile && whichfork == XFS_DATA_FORK) {
6371                 if (!xfs_verify_rtbext(mp, irec->br_startblock,
6372                                            irec->br_blockcount))
6373                         return __this_address;
6374         } else {
6375                 if (!xfs_verify_fsbext(mp, irec->br_startblock,
6376                                            irec->br_blockcount))
6377                         return __this_address;
6378         }
6379         if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6380                 return __this_address;
6381         return NULL;
6382 }
6383
6384 int __init
6385 xfs_bmap_intent_init_cache(void)
6386 {
6387         xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent",
6388                         sizeof(struct xfs_bmap_intent),
6389                         0, 0, NULL);
6390
6391         return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM;
6392 }
6393
6394 void
6395 xfs_bmap_intent_destroy_cache(void)
6396 {
6397         kmem_cache_destroy(xfs_bmap_intent_cache);
6398         xfs_bmap_intent_cache = NULL;
6399 }
6400
6401 /* Check that an inode's extent does not have invalid flags or bad ranges. */
6402 xfs_failaddr_t
6403 xfs_bmap_validate_extent(
6404         struct xfs_inode        *ip,
6405         int                     whichfork,
6406         struct xfs_bmbt_irec    *irec)
6407 {
6408         return xfs_bmap_validate_extent_raw(ip->i_mount,
6409                         XFS_IS_REALTIME_INODE(ip), whichfork, irec);
6410 }
6411
6412 /*
6413  * Used in xfs_itruncate_extents().  This is the maximum number of extents
6414  * freed from a file in a single transaction.
6415  */
6416 #define XFS_ITRUNC_MAX_EXTENTS  2
6417
6418 /*
6419  * Unmap every extent in part of an inode's fork.  We don't do any higher level
6420  * invalidation work at all.
6421  */
6422 int
6423 xfs_bunmapi_range(
6424         struct xfs_trans        **tpp,
6425         struct xfs_inode        *ip,
6426         uint32_t                flags,
6427         xfs_fileoff_t           startoff,
6428         xfs_fileoff_t           endoff)
6429 {
6430         xfs_filblks_t           unmap_len = endoff - startoff + 1;
6431         int                     error = 0;
6432
6433         xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
6434
6435         while (unmap_len > 0) {
6436                 ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
6437                 error = __xfs_bunmapi(*tpp, ip, startoff, &unmap_len, flags,
6438                                 XFS_ITRUNC_MAX_EXTENTS);
6439                 if (error)
6440                         goto out;
6441
6442                 /* free the just unmapped extents */
6443                 error = xfs_defer_finish(tpp);
6444                 if (error)
6445                         goto out;
6446                 cond_resched();
6447         }
6448 out:
6449         return error;
6450 }
6451
6452 struct xfs_bmap_query_range {
6453         xfs_bmap_query_range_fn fn;
6454         void                    *priv;
6455 };
6456
6457 /* Format btree record and pass to our callback. */
6458 STATIC int
6459 xfs_bmap_query_range_helper(
6460         struct xfs_btree_cur            *cur,
6461         const union xfs_btree_rec       *rec,
6462         void                            *priv)
6463 {
6464         struct xfs_bmap_query_range     *query = priv;
6465         struct xfs_bmbt_irec            irec;
6466         xfs_failaddr_t                  fa;
6467
6468         xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
6469         fa = xfs_bmap_validate_extent(cur->bc_ino.ip, cur->bc_ino.whichfork,
6470                         &irec);
6471         if (fa) {
6472                 xfs_btree_mark_sick(cur);
6473                 return xfs_bmap_complain_bad_rec(cur->bc_ino.ip,
6474                                 cur->bc_ino.whichfork, fa, &irec);
6475         }
6476
6477         return query->fn(cur, &irec, query->priv);
6478 }
6479
6480 /* Find all bmaps. */
6481 int
6482 xfs_bmap_query_all(
6483         struct xfs_btree_cur            *cur,
6484         xfs_bmap_query_range_fn         fn,
6485         void                            *priv)
6486 {
6487         struct xfs_bmap_query_range     query = {
6488                 .priv                   = priv,
6489                 .fn                     = fn,
6490         };
6491
6492         return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query);
6493 }
6494
6495 /* Helper function to extract extent size hint from inode */
6496 xfs_extlen_t
6497 xfs_get_extsz_hint(
6498         struct xfs_inode        *ip)
6499 {
6500         /*
6501          * No point in aligning allocations if we need to COW to actually
6502          * write to them.
6503          */
6504         if (xfs_is_always_cow_inode(ip))
6505                 return 0;
6506         if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
6507                 return ip->i_extsize;
6508         if (XFS_IS_REALTIME_INODE(ip) &&
6509             ip->i_mount->m_sb.sb_rextsize > 1)
6510                 return ip->i_mount->m_sb.sb_rextsize;
6511         return 0;
6512 }
6513
6514 /*
6515  * Helper function to extract CoW extent size hint from inode.
6516  * Between the extent size hint and the CoW extent size hint, we
6517  * return the greater of the two.  If the value is zero (automatic),
6518  * use the default size.
6519  */
6520 xfs_extlen_t
6521 xfs_get_cowextsz_hint(
6522         struct xfs_inode        *ip)
6523 {
6524         xfs_extlen_t            a, b;
6525
6526         a = 0;
6527         if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
6528                 a = ip->i_cowextsize;
6529         b = xfs_get_extsz_hint(ip);
6530
6531         a = max(a, b);
6532         if (a == 0)
6533                 return XFS_DEFAULT_COWEXTSZ_HINT;
6534         return a;
6535 }
This page took 0.410241 seconds and 4 git commands to generate.