]> Git Repo - linux.git/blame - fs/xfs/libxfs/xfs_alloc.c
xfs: check for cow blocks before trying to clear them
[linux.git] / fs / xfs / libxfs / xfs_alloc.c
CommitLineData
1da177e4 1/*
7b718769
NS
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
1da177e4 4 *
7b718769
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
1da177e4
LT
7 * published by the Free Software Foundation.
8 *
7b718769
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
1da177e4 13 *
7b718769
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1da177e4 17 */
1da177e4 18#include "xfs.h"
a844f451 19#include "xfs_fs.h"
70a9883c 20#include "xfs_format.h"
239880ef 21#include "xfs_log_format.h"
70a9883c 22#include "xfs_shared.h"
239880ef 23#include "xfs_trans_resv.h"
a844f451 24#include "xfs_bit.h"
1da177e4 25#include "xfs_sb.h"
1da177e4 26#include "xfs_mount.h"
3ab78df2 27#include "xfs_defer.h"
a844f451 28#include "xfs_inode.h"
1da177e4 29#include "xfs_btree.h"
673930c3 30#include "xfs_rmap.h"
a4fbe6ab 31#include "xfs_alloc_btree.h"
1da177e4 32#include "xfs_alloc.h"
efc27b52 33#include "xfs_extent_busy.h"
e9e899a2 34#include "xfs_errortag.h"
1da177e4 35#include "xfs_error.h"
4e0e6040 36#include "xfs_cksum.h"
0b1b213f 37#include "xfs_trace.h"
239880ef 38#include "xfs_trans.h"
4e0e6040 39#include "xfs_buf_item.h"
239880ef 40#include "xfs_log.h"
3fd129b6 41#include "xfs_ag_resv.h"
1da177e4 42
c999a223 43struct workqueue_struct *xfs_alloc_wq;
1da177e4
LT
44
45#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
46
47#define XFSA_FIXUP_BNO_OK 1
48#define XFSA_FIXUP_CNT_OK 2
49
1da177e4
LT
50STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
51STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
52STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
53STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
e26f0501 54 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
1da177e4 55
af30dfa1
DW
56unsigned int
57xfs_refc_block(
58 struct xfs_mount *mp)
59{
60 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
61 return XFS_RMAP_BLOCK(mp) + 1;
62 if (xfs_sb_version_hasfinobt(&mp->m_sb))
63 return XFS_FIBT_BLOCK(mp) + 1;
64 return XFS_IBT_BLOCK(mp) + 1;
65}
66
8018026e
DW
67xfs_extlen_t
68xfs_prealloc_blocks(
69 struct xfs_mount *mp)
70{
af30dfa1
DW
71 if (xfs_sb_version_hasreflink(&mp->m_sb))
72 return xfs_refc_block(mp) + 1;
8018026e
DW
73 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
74 return XFS_RMAP_BLOCK(mp) + 1;
75 if (xfs_sb_version_hasfinobt(&mp->m_sb))
76 return XFS_FIBT_BLOCK(mp) + 1;
77 return XFS_IBT_BLOCK(mp) + 1;
78}
79
52548852
DW
80/*
81 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
82 * AGF buffer (PV 947395), we place constraints on the relationship among
83 * actual allocations for data blocks, freelist blocks, and potential file data
84 * bmap btree blocks. However, these restrictions may result in no actual space
85 * allocated for a delayed extent, for example, a data block in a certain AG is
86 * allocated but there is no additional block for the additional bmap btree
87 * block due to a split of the bmap btree of the file. The result of this may
88 * lead to an infinite loop when the file gets flushed to disk and all delayed
89 * extents need to be actually allocated. To get around this, we explicitly set
90 * aside a few blocks which will not be reserved in delayed allocation.
91 *
3fd129b6
DW
92 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
93 * potential split of the file's bmap btree.
52548852
DW
94 */
95unsigned int
96xfs_alloc_set_aside(
97 struct xfs_mount *mp)
98{
5149fd32 99 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
52548852
DW
100}
101
102/*
103 * When deciding how much space to allocate out of an AG, we limit the
104 * allocation maximum size to the size the AG. However, we cannot use all the
105 * blocks in the AG - some are permanently used by metadata. These
106 * blocks are generally:
107 * - the AG superblock, AGF, AGI and AGFL
108 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
109 * the AGI free inode and rmap btree root blocks.
110 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
111 * - the rmapbt root block
112 *
113 * The AG headers are sector sized, so the amount of space they take up is
114 * dependent on filesystem geometry. The others are all single blocks.
115 */
116unsigned int
117xfs_alloc_ag_max_usable(
118 struct xfs_mount *mp)
119{
120 unsigned int blocks;
121
122 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
123 blocks += XFS_ALLOC_AGFL_RESERVE;
124 blocks += 3; /* AGF, AGI btree root blocks */
125 if (xfs_sb_version_hasfinobt(&mp->m_sb))
126 blocks++; /* finobt root block */
127 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
128 blocks++; /* rmap root block */
d0e853f3
DW
129 if (xfs_sb_version_hasreflink(&mp->m_sb))
130 blocks++; /* refcount root block */
52548852
DW
131
132 return mp->m_sb.sb_agblocks - blocks;
133}
134
fe033cc8
CH
135/*
136 * Lookup the record equal to [bno, len] in the btree given by cur.
137 */
138STATIC int /* error */
139xfs_alloc_lookup_eq(
140 struct xfs_btree_cur *cur, /* btree cursor */
141 xfs_agblock_t bno, /* starting block of extent */
142 xfs_extlen_t len, /* length of extent */
143 int *stat) /* success/failure */
144{
145 cur->bc_rec.a.ar_startblock = bno;
146 cur->bc_rec.a.ar_blockcount = len;
147 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
148}
149
150/*
151 * Lookup the first record greater than or equal to [bno, len]
152 * in the btree given by cur.
153 */
a66d6363 154int /* error */
fe033cc8
CH
155xfs_alloc_lookup_ge(
156 struct xfs_btree_cur *cur, /* btree cursor */
157 xfs_agblock_t bno, /* starting block of extent */
158 xfs_extlen_t len, /* length of extent */
159 int *stat) /* success/failure */
160{
161 cur->bc_rec.a.ar_startblock = bno;
162 cur->bc_rec.a.ar_blockcount = len;
163 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
164}
165
166/*
167 * Lookup the first record less than or equal to [bno, len]
168 * in the btree given by cur.
169 */
ce1d802e 170int /* error */
fe033cc8
CH
171xfs_alloc_lookup_le(
172 struct xfs_btree_cur *cur, /* btree cursor */
173 xfs_agblock_t bno, /* starting block of extent */
174 xfs_extlen_t len, /* length of extent */
175 int *stat) /* success/failure */
176{
177 cur->bc_rec.a.ar_startblock = bno;
178 cur->bc_rec.a.ar_blockcount = len;
179 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
180}
181
278d0ca1
CH
182/*
183 * Update the record referred to by cur to the value given
184 * by [bno, len].
185 * This either works (return 0) or gets an EFSCORRUPTED error.
186 */
187STATIC int /* error */
188xfs_alloc_update(
189 struct xfs_btree_cur *cur, /* btree cursor */
190 xfs_agblock_t bno, /* starting block of extent */
191 xfs_extlen_t len) /* length of extent */
192{
193 union xfs_btree_rec rec;
194
195 rec.alloc.ar_startblock = cpu_to_be32(bno);
196 rec.alloc.ar_blockcount = cpu_to_be32(len);
197 return xfs_btree_update(cur, &rec);
198}
fe033cc8 199
8cc938fe
CH
200/*
201 * Get the data from the pointed-to record.
202 */
a46db608 203int /* error */
8cc938fe
CH
204xfs_alloc_get_rec(
205 struct xfs_btree_cur *cur, /* btree cursor */
206 xfs_agblock_t *bno, /* output: starting block of extent */
207 xfs_extlen_t *len, /* output: length of extent */
208 int *stat) /* output: success/failure */
209{
210 union xfs_btree_rec *rec;
211 int error;
212
213 error = xfs_btree_get_rec(cur, &rec, stat);
214 if (!error && *stat == 1) {
215 *bno = be32_to_cpu(rec->alloc.ar_startblock);
216 *len = be32_to_cpu(rec->alloc.ar_blockcount);
217 }
218 return error;
219}
220
1da177e4
LT
221/*
222 * Compute aligned version of the found extent.
223 * Takes alignment and min length into account.
224 */
ebf55872 225STATIC bool
1da177e4 226xfs_alloc_compute_aligned(
86fa8af6 227 xfs_alloc_arg_t *args, /* allocation argument structure */
1da177e4
LT
228 xfs_agblock_t foundbno, /* starting block in found extent */
229 xfs_extlen_t foundlen, /* length in found extent */
1da177e4 230 xfs_agblock_t *resbno, /* result block number */
ebf55872
CH
231 xfs_extlen_t *reslen, /* result length */
232 unsigned *busy_gen)
1da177e4 233{
ebf55872
CH
234 xfs_agblock_t bno = foundbno;
235 xfs_extlen_t len = foundlen;
bfe46d4e 236 xfs_extlen_t diff;
ebf55872 237 bool busy;
1da177e4 238
e26f0501 239 /* Trim busy sections out of found extent */
ebf55872 240 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
e26f0501 241
bfe46d4e
BF
242 /*
243 * If we have a largish extent that happens to start before min_agbno,
244 * see if we can shift it into range...
245 */
246 if (bno < args->min_agbno && bno + len > args->min_agbno) {
247 diff = args->min_agbno - bno;
248 if (len > diff) {
249 bno += diff;
250 len -= diff;
251 }
252 }
253
e26f0501
CH
254 if (args->alignment > 1 && len >= args->minlen) {
255 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
bfe46d4e
BF
256
257 diff = aligned_bno - bno;
e26f0501
CH
258
259 *resbno = aligned_bno;
260 *reslen = diff >= len ? 0 : len - diff;
1da177e4 261 } else {
e26f0501
CH
262 *resbno = bno;
263 *reslen = len;
1da177e4 264 }
ebf55872
CH
265
266 return busy;
1da177e4
LT
267}
268
269/*
270 * Compute best start block and diff for "near" allocations.
271 * freelen >= wantlen already checked by caller.
272 */
273STATIC xfs_extlen_t /* difference value (absolute) */
274xfs_alloc_compute_diff(
275 xfs_agblock_t wantbno, /* target starting block */
276 xfs_extlen_t wantlen, /* target length */
277 xfs_extlen_t alignment, /* target alignment */
292378ed 278 int datatype, /* are we allocating data? */
1da177e4
LT
279 xfs_agblock_t freebno, /* freespace's starting block */
280 xfs_extlen_t freelen, /* freespace's length */
281 xfs_agblock_t *newbnop) /* result: best start block from free */
282{
283 xfs_agblock_t freeend; /* end of freespace extent */
284 xfs_agblock_t newbno1; /* return block number */
285 xfs_agblock_t newbno2; /* other new block number */
286 xfs_extlen_t newlen1=0; /* length with newbno1 */
287 xfs_extlen_t newlen2=0; /* length with newbno2 */
288 xfs_agblock_t wantend; /* end of target extent */
292378ed 289 bool userdata = xfs_alloc_is_userdata(datatype);
1da177e4
LT
290
291 ASSERT(freelen >= wantlen);
292 freeend = freebno + freelen;
293 wantend = wantbno + wantlen;
211d022c
JK
294 /*
295 * We want to allocate from the start of a free extent if it is past
296 * the desired block or if we are allocating user data and the free
297 * extent is before desired block. The second case is there to allow
298 * for contiguous allocation from the remaining free space if the file
299 * grows in the short term.
300 */
301 if (freebno >= wantbno || (userdata && freeend < wantend)) {
1da177e4
LT
302 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
303 newbno1 = NULLAGBLOCK;
304 } else if (freeend >= wantend && alignment > 1) {
305 newbno1 = roundup(wantbno, alignment);
306 newbno2 = newbno1 - alignment;
307 if (newbno1 >= freeend)
308 newbno1 = NULLAGBLOCK;
309 else
310 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
311 if (newbno2 < freebno)
312 newbno2 = NULLAGBLOCK;
313 else
314 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
315 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
316 if (newlen1 < newlen2 ||
317 (newlen1 == newlen2 &&
318 XFS_ABSDIFF(newbno1, wantbno) >
319 XFS_ABSDIFF(newbno2, wantbno)))
320 newbno1 = newbno2;
321 } else if (newbno2 != NULLAGBLOCK)
322 newbno1 = newbno2;
323 } else if (freeend >= wantend) {
324 newbno1 = wantbno;
325 } else if (alignment > 1) {
326 newbno1 = roundup(freeend - wantlen, alignment);
327 if (newbno1 > freeend - wantlen &&
328 newbno1 - alignment >= freebno)
329 newbno1 -= alignment;
330 else if (newbno1 >= freeend)
331 newbno1 = NULLAGBLOCK;
332 } else
333 newbno1 = freeend - wantlen;
334 *newbnop = newbno1;
335 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
336}
337
338/*
339 * Fix up the length, based on mod and prod.
340 * len should be k * prod + mod for some k.
341 * If len is too small it is returned unchanged.
342 * If len hits maxlen it is left alone.
343 */
344STATIC void
345xfs_alloc_fix_len(
346 xfs_alloc_arg_t *args) /* allocation argument structure */
347{
348 xfs_extlen_t k;
349 xfs_extlen_t rlen;
350
351 ASSERT(args->mod < args->prod);
352 rlen = args->len;
353 ASSERT(rlen >= args->minlen);
354 ASSERT(rlen <= args->maxlen);
355 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
356 (args->mod == 0 && rlen < args->prod))
357 return;
358 k = rlen % args->prod;
359 if (k == args->mod)
360 return;
30265117
JK
361 if (k > args->mod)
362 rlen = rlen - (k - args->mod);
363 else
364 rlen = rlen - args->prod + (args->mod - k);
3790a8cd 365 /* casts to (int) catch length underflows */
30265117
JK
366 if ((int)rlen < (int)args->minlen)
367 return;
368 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
369 ASSERT(rlen % args->prod == args->mod);
54fee133
CH
370 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
371 rlen + args->minleft);
1da177e4
LT
372 args->len = rlen;
373}
374
1da177e4
LT
375/*
376 * Update the two btrees, logically removing from freespace the extent
377 * starting at rbno, rlen blocks. The extent is contained within the
378 * actual (current) free extent fbno for flen blocks.
379 * Flags are passed in indicating whether the cursors are set to the
380 * relevant records.
381 */
382STATIC int /* error code */
383xfs_alloc_fixup_trees(
384 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
385 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
386 xfs_agblock_t fbno, /* starting block of free extent */
387 xfs_extlen_t flen, /* length of free extent */
388 xfs_agblock_t rbno, /* starting block of returned extent */
389 xfs_extlen_t rlen, /* length of returned extent */
390 int flags) /* flags, XFSA_FIXUP_... */
391{
392 int error; /* error code */
393 int i; /* operation results */
394 xfs_agblock_t nfbno1; /* first new free startblock */
395 xfs_agblock_t nfbno2; /* second new free startblock */
396 xfs_extlen_t nflen1=0; /* first new free length */
397 xfs_extlen_t nflen2=0; /* second new free length */
5fb5aeee
ES
398 struct xfs_mount *mp;
399
400 mp = cnt_cur->bc_mp;
1da177e4
LT
401
402 /*
403 * Look up the record in the by-size tree if necessary.
404 */
405 if (flags & XFSA_FIXUP_CNT_OK) {
406#ifdef DEBUG
407 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
408 return error;
5fb5aeee 409 XFS_WANT_CORRUPTED_RETURN(mp,
1da177e4
LT
410 i == 1 && nfbno1 == fbno && nflen1 == flen);
411#endif
412 } else {
413 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
414 return error;
5fb5aeee 415 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
416 }
417 /*
418 * Look up the record in the by-block tree if necessary.
419 */
420 if (flags & XFSA_FIXUP_BNO_OK) {
421#ifdef DEBUG
422 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
423 return error;
5fb5aeee 424 XFS_WANT_CORRUPTED_RETURN(mp,
1da177e4
LT
425 i == 1 && nfbno1 == fbno && nflen1 == flen);
426#endif
427 } else {
428 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
429 return error;
5fb5aeee 430 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4 431 }
7cc95a82 432
1da177e4 433#ifdef DEBUG
7cc95a82
CH
434 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
435 struct xfs_btree_block *bnoblock;
436 struct xfs_btree_block *cntblock;
437
438 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
439 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
1da177e4 440
5fb5aeee 441 XFS_WANT_CORRUPTED_RETURN(mp,
7cc95a82 442 bnoblock->bb_numrecs == cntblock->bb_numrecs);
1da177e4
LT
443 }
444#endif
7cc95a82 445
1da177e4
LT
446 /*
447 * Deal with all four cases: the allocated record is contained
448 * within the freespace record, so we can have new freespace
449 * at either (or both) end, or no freespace remaining.
450 */
451 if (rbno == fbno && rlen == flen)
452 nfbno1 = nfbno2 = NULLAGBLOCK;
453 else if (rbno == fbno) {
454 nfbno1 = rbno + rlen;
455 nflen1 = flen - rlen;
456 nfbno2 = NULLAGBLOCK;
457 } else if (rbno + rlen == fbno + flen) {
458 nfbno1 = fbno;
459 nflen1 = flen - rlen;
460 nfbno2 = NULLAGBLOCK;
461 } else {
462 nfbno1 = fbno;
463 nflen1 = rbno - fbno;
464 nfbno2 = rbno + rlen;
465 nflen2 = (fbno + flen) - nfbno2;
466 }
467 /*
468 * Delete the entry from the by-size btree.
469 */
91cca5df 470 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 471 return error;
5fb5aeee 472 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
473 /*
474 * Add new by-size btree entry(s).
475 */
476 if (nfbno1 != NULLAGBLOCK) {
477 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
478 return error;
5fb5aeee 479 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
4b22a571 480 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 481 return error;
5fb5aeee 482 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
483 }
484 if (nfbno2 != NULLAGBLOCK) {
485 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
486 return error;
5fb5aeee 487 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
4b22a571 488 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 489 return error;
5fb5aeee 490 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
491 }
492 /*
493 * Fix up the by-block btree entry(s).
494 */
495 if (nfbno1 == NULLAGBLOCK) {
496 /*
497 * No remaining freespace, just delete the by-block tree entry.
498 */
91cca5df 499 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 500 return error;
5fb5aeee 501 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
502 } else {
503 /*
504 * Update the by-block entry to start later|be shorter.
505 */
506 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
507 return error;
508 }
509 if (nfbno2 != NULLAGBLOCK) {
510 /*
511 * 2 resulting free entries, need to add one.
512 */
513 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
514 return error;
5fb5aeee 515 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
4b22a571 516 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 517 return error;
5fb5aeee 518 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
519 }
520 return 0;
521}
522
a6a781a5 523static xfs_failaddr_t
612cfbfe 524xfs_agfl_verify(
bb80c6d7
DC
525 struct xfs_buf *bp)
526{
bb80c6d7
DC
527 struct xfs_mount *mp = bp->b_target->bt_mount;
528 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
bb80c6d7
DC
529 int i;
530
b5572597
DW
531 /*
532 * There is no verification of non-crc AGFLs because mkfs does not
533 * initialise the AGFL to zero or NULL. Hence the only valid part of the
534 * AGFL is what the AGF says is active. We can't get to the AGF, so we
535 * can't verify just those entries are valid.
536 */
537 if (!xfs_sb_version_hascrc(&mp->m_sb))
538 return NULL;
539
ce748eaa 540 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 541 return __this_address;
77c95bba 542 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
a6a781a5 543 return __this_address;
77c95bba
CH
544 /*
545 * during growfs operations, the perag is not fully initialised,
546 * so we can't use it for any useful checking. growfs ensures we can't
547 * use it by using uncached buffers that don't have the perag attached
548 * so we can detect and avoid this problem.
549 */
550 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
a6a781a5 551 return __this_address;
77c95bba 552
bb80c6d7 553 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
77c95bba 554 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
bb80c6d7 555 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
a6a781a5 556 return __this_address;
bb80c6d7 557 }
a45086e2 558
a6a781a5
DW
559 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
560 return __this_address;
561 return NULL;
77c95bba
CH
562}
563
564static void
565xfs_agfl_read_verify(
566 struct xfs_buf *bp)
567{
568 struct xfs_mount *mp = bp->b_target->bt_mount;
bc1a09b8 569 xfs_failaddr_t fa;
77c95bba
CH
570
571 /*
572 * There is no verification of non-crc AGFLs because mkfs does not
573 * initialise the AGFL to zero or NULL. Hence the only valid part of the
574 * AGFL is what the AGF says is active. We can't get to the AGF, so we
575 * can't verify just those entries are valid.
576 */
577 if (!xfs_sb_version_hascrc(&mp->m_sb))
578 return;
579
ce5028cf 580 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
bc1a09b8
DW
581 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
582 else {
583 fa = xfs_agfl_verify(bp);
584 if (fa)
585 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
586 }
612cfbfe
DC
587}
588
1813dd64 589static void
612cfbfe
DC
590xfs_agfl_write_verify(
591 struct xfs_buf *bp)
592{
fb1755a6
CM
593 struct xfs_mount *mp = bp->b_target->bt_mount;
594 struct xfs_buf_log_item *bip = bp->b_log_item;
bc1a09b8 595 xfs_failaddr_t fa;
612cfbfe 596
77c95bba
CH
597 /* no verification of non-crc AGFLs */
598 if (!xfs_sb_version_hascrc(&mp->m_sb))
599 return;
600
bc1a09b8
DW
601 fa = xfs_agfl_verify(bp);
602 if (fa) {
603 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
77c95bba
CH
604 return;
605 }
606
607 if (bip)
608 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
609
f1dbcd7e 610 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
bb80c6d7
DC
611}
612
1813dd64 613const struct xfs_buf_ops xfs_agfl_buf_ops = {
233135b7 614 .name = "xfs_agfl",
1813dd64
DC
615 .verify_read = xfs_agfl_read_verify,
616 .verify_write = xfs_agfl_write_verify,
b5572597 617 .verify_struct = xfs_agfl_verify,
1813dd64
DC
618};
619
1da177e4
LT
620/*
621 * Read in the allocation group free block array.
622 */
26788097 623int /* error */
1da177e4
LT
624xfs_alloc_read_agfl(
625 xfs_mount_t *mp, /* mount point structure */
626 xfs_trans_t *tp, /* transaction pointer */
627 xfs_agnumber_t agno, /* allocation group number */
628 xfs_buf_t **bpp) /* buffer for the ag free block array */
629{
630 xfs_buf_t *bp; /* return value */
631 int error;
632
633 ASSERT(agno != NULLAGNUMBER);
634 error = xfs_trans_read_buf(
635 mp, tp, mp->m_ddev_targp,
636 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
1813dd64 637 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
1da177e4
LT
638 if (error)
639 return error;
38f23232 640 xfs_buf_set_ref(bp, XFS_AGFL_REF);
1da177e4
LT
641 *bpp = bp;
642 return 0;
643}
644
ecb6928f
CH
645STATIC int
646xfs_alloc_update_counters(
647 struct xfs_trans *tp,
648 struct xfs_perag *pag,
649 struct xfs_buf *agbp,
650 long len)
651{
652 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
653
654 pag->pagf_freeblks += len;
655 be32_add_cpu(&agf->agf_freeblks, len);
656
657 xfs_trans_agblocks_delta(tp, len);
658 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
659 be32_to_cpu(agf->agf_length)))
2451337d 660 return -EFSCORRUPTED;
ecb6928f
CH
661
662 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
663 return 0;
664}
665
1da177e4
LT
666/*
667 * Allocation group level functions.
668 */
669
670/*
671 * Allocate a variable extent in the allocation group agno.
672 * Type and bno are used to determine where in the allocation group the
673 * extent will start.
674 * Extent's length (returned in *len) will be between minlen and maxlen,
675 * and of the form k * prod + mod unless there's nothing that large.
676 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
677 */
678STATIC int /* error */
679xfs_alloc_ag_vextent(
680 xfs_alloc_arg_t *args) /* argument structure for allocation */
681{
682 int error=0;
1da177e4
LT
683
684 ASSERT(args->minlen > 0);
685 ASSERT(args->maxlen > 0);
686 ASSERT(args->minlen <= args->maxlen);
687 ASSERT(args->mod < args->prod);
688 ASSERT(args->alignment > 0);
3fd129b6 689
1da177e4
LT
690 /*
691 * Branch to correct routine based on the type.
692 */
693 args->wasfromfl = 0;
694 switch (args->type) {
695 case XFS_ALLOCTYPE_THIS_AG:
696 error = xfs_alloc_ag_vextent_size(args);
697 break;
698 case XFS_ALLOCTYPE_NEAR_BNO:
699 error = xfs_alloc_ag_vextent_near(args);
700 break;
701 case XFS_ALLOCTYPE_THIS_BNO:
702 error = xfs_alloc_ag_vextent_exact(args);
703 break;
704 default:
705 ASSERT(0);
706 /* NOTREACHED */
707 }
ecb6928f
CH
708
709 if (error || args->agbno == NULLAGBLOCK)
1da177e4 710 return error;
ecb6928f
CH
711
712 ASSERT(args->len >= args->minlen);
713 ASSERT(args->len <= args->maxlen);
3fd129b6 714 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
ecb6928f
CH
715 ASSERT(args->agbno % args->alignment == 0);
716
673930c3 717 /* if not file data, insert new block into the reverse map btree */
33df3a9c 718 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
673930c3
DW
719 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
720 args->agbno, args->len, &args->oinfo);
721 if (error)
722 return error;
723 }
724
ecb6928f
CH
725 if (!args->wasfromfl) {
726 error = xfs_alloc_update_counters(args->tp, args->pag,
727 args->agbp,
728 -((long)(args->len)));
729 if (error)
730 return error;
731
4ecbfe63 732 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
e26f0501 733 args->agbno, args->len));
1da177e4 734 }
ecb6928f 735
3fd129b6 736 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
ecb6928f 737
ff6d6af2
BD
738 XFS_STATS_INC(args->mp, xs_allocx);
739 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
ecb6928f 740 return error;
1da177e4
LT
741}
742
743/*
744 * Allocate a variable extent at exactly agno/bno.
745 * Extent's length (returned in *len) will be between minlen and maxlen,
746 * and of the form k * prod + mod unless there's nothing that large.
747 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
748 */
749STATIC int /* error */
750xfs_alloc_ag_vextent_exact(
751 xfs_alloc_arg_t *args) /* allocation argument structure */
752{
753 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
754 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
1da177e4
LT
755 int error;
756 xfs_agblock_t fbno; /* start block of found extent */
1da177e4 757 xfs_extlen_t flen; /* length of found extent */
ebf55872
CH
758 xfs_agblock_t tbno; /* start block of busy extent */
759 xfs_extlen_t tlen; /* length of busy extent */
760 xfs_agblock_t tend; /* end block of busy extent */
1da177e4 761 int i; /* success/failure of operation */
ebf55872 762 unsigned busy_gen;
1da177e4
LT
763
764 ASSERT(args->alignment == 1);
9f9baab3 765
1da177e4
LT
766 /*
767 * Allocate/initialize a cursor for the by-number freespace btree.
768 */
561f7d17 769 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
9f9baab3
CH
770 args->agno, XFS_BTNUM_BNO);
771
1da177e4
LT
772 /*
773 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
774 * Look for the closest free block <= bno, it must contain bno
775 * if any free block does.
776 */
9f9baab3
CH
777 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
778 if (error)
1da177e4 779 goto error0;
9f9baab3
CH
780 if (!i)
781 goto not_found;
782
1da177e4
LT
783 /*
784 * Grab the freespace record.
785 */
9f9baab3
CH
786 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
787 if (error)
1da177e4 788 goto error0;
c29aad41 789 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4 790 ASSERT(fbno <= args->agbno);
9f9baab3 791
1da177e4 792 /*
e26f0501 793 * Check for overlapping busy extents.
1da177e4 794 */
ebf55872
CH
795 tbno = fbno;
796 tlen = flen;
797 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
e26f0501
CH
798
799 /*
800 * Give up if the start of the extent is busy, or the freespace isn't
801 * long enough for the minimum request.
802 */
803 if (tbno > args->agbno)
804 goto not_found;
805 if (tlen < args->minlen)
806 goto not_found;
807 tend = tbno + tlen;
808 if (tend < args->agbno + args->minlen)
9f9baab3
CH
809 goto not_found;
810
1da177e4
LT
811 /*
812 * End of extent will be smaller of the freespace end and the
813 * maximal requested end.
9f9baab3 814 *
1da177e4
LT
815 * Fix the length according to mod and prod if given.
816 */
81463b1c
CS
817 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
818 - args->agbno;
1da177e4 819 xfs_alloc_fix_len(args);
81463b1c 820 ASSERT(args->agbno + args->len <= tend);
9f9baab3 821
1da177e4 822 /*
81463b1c 823 * We are allocating agbno for args->len
1da177e4
LT
824 * Allocate/initialize a cursor for the by-size btree.
825 */
561f7d17
CH
826 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
827 args->agno, XFS_BTNUM_CNT);
1da177e4 828 ASSERT(args->agbno + args->len <=
16259e7d 829 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
9f9baab3
CH
830 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
831 args->len, XFSA_FIXUP_BNO_OK);
832 if (error) {
1da177e4
LT
833 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
834 goto error0;
835 }
9f9baab3 836
1da177e4
LT
837 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
838 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 839
1da177e4 840 args->wasfromfl = 0;
9f9baab3
CH
841 trace_xfs_alloc_exact_done(args);
842 return 0;
843
844not_found:
845 /* Didn't find it, return null. */
846 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
847 args->agbno = NULLAGBLOCK;
848 trace_xfs_alloc_exact_notfound(args);
1da177e4
LT
849 return 0;
850
851error0:
852 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
0b1b213f 853 trace_xfs_alloc_exact_error(args);
1da177e4
LT
854 return error;
855}
856
489a150f
CH
857/*
858 * Search the btree in a given direction via the search cursor and compare
859 * the records found against the good extent we've already found.
860 */
861STATIC int
862xfs_alloc_find_best_extent(
863 struct xfs_alloc_arg *args, /* allocation argument structure */
864 struct xfs_btree_cur **gcur, /* good cursor */
865 struct xfs_btree_cur **scur, /* searching cursor */
866 xfs_agblock_t gdiff, /* difference for search comparison */
867 xfs_agblock_t *sbno, /* extent found by search */
e26f0501
CH
868 xfs_extlen_t *slen, /* extent length */
869 xfs_agblock_t *sbnoa, /* aligned extent found by search */
870 xfs_extlen_t *slena, /* aligned extent length */
489a150f
CH
871 int dir) /* 0 = search right, 1 = search left */
872{
489a150f
CH
873 xfs_agblock_t new;
874 xfs_agblock_t sdiff;
875 int error;
876 int i;
ebf55872 877 unsigned busy_gen;
489a150f
CH
878
879 /* The good extent is perfect, no need to search. */
880 if (!gdiff)
881 goto out_use_good;
882
883 /*
884 * Look until we find a better one, run out of space or run off the end.
885 */
886 do {
887 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
888 if (error)
889 goto error0;
c29aad41 890 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
ebf55872
CH
891 xfs_alloc_compute_aligned(args, *sbno, *slen,
892 sbnoa, slena, &busy_gen);
489a150f
CH
893
894 /*
895 * The good extent is closer than this one.
896 */
897 if (!dir) {
bfe46d4e
BF
898 if (*sbnoa > args->max_agbno)
899 goto out_use_good;
e26f0501 900 if (*sbnoa >= args->agbno + gdiff)
489a150f
CH
901 goto out_use_good;
902 } else {
bfe46d4e
BF
903 if (*sbnoa < args->min_agbno)
904 goto out_use_good;
e26f0501 905 if (*sbnoa <= args->agbno - gdiff)
489a150f
CH
906 goto out_use_good;
907 }
908
909 /*
910 * Same distance, compare length and pick the best.
911 */
912 if (*slena >= args->minlen) {
913 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
914 xfs_alloc_fix_len(args);
915
916 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
211d022c 917 args->alignment,
292378ed 918 args->datatype, *sbnoa,
e26f0501 919 *slena, &new);
489a150f
CH
920
921 /*
922 * Choose closer size and invalidate other cursor.
923 */
924 if (sdiff < gdiff)
925 goto out_use_search;
926 goto out_use_good;
927 }
928
929 if (!dir)
930 error = xfs_btree_increment(*scur, 0, &i);
931 else
932 error = xfs_btree_decrement(*scur, 0, &i);
933 if (error)
934 goto error0;
935 } while (i);
936
937out_use_good:
938 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
939 *scur = NULL;
940 return 0;
941
942out_use_search:
943 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
944 *gcur = NULL;
945 return 0;
946
947error0:
948 /* caller invalidates cursors */
949 return error;
950}
951
1da177e4
LT
952/*
953 * Allocate a variable extent near bno in the allocation group agno.
954 * Extent's length (returned in len) will be between minlen and maxlen,
955 * and of the form k * prod + mod unless there's nothing that large.
956 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
957 */
958STATIC int /* error */
959xfs_alloc_ag_vextent_near(
960 xfs_alloc_arg_t *args) /* allocation argument structure */
961{
962 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
963 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
964 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
1da177e4
LT
965 xfs_agblock_t gtbno; /* start bno of right side entry */
966 xfs_agblock_t gtbnoa; /* aligned ... */
967 xfs_extlen_t gtdiff; /* difference to right side entry */
968 xfs_extlen_t gtlen; /* length of right side entry */
e26f0501 969 xfs_extlen_t gtlena; /* aligned ... */
1da177e4
LT
970 xfs_agblock_t gtnew; /* useful start bno of right side */
971 int error; /* error code */
972 int i; /* result code, temporary */
973 int j; /* result code, temporary */
974 xfs_agblock_t ltbno; /* start bno of left side entry */
975 xfs_agblock_t ltbnoa; /* aligned ... */
976 xfs_extlen_t ltdiff; /* difference to left side entry */
1da177e4 977 xfs_extlen_t ltlen; /* length of left side entry */
e26f0501 978 xfs_extlen_t ltlena; /* aligned ... */
1da177e4
LT
979 xfs_agblock_t ltnew; /* useful start bno of left side */
980 xfs_extlen_t rlen; /* length of returned extent */
ebf55872
CH
981 bool busy;
982 unsigned busy_gen;
63d20d6e 983#ifdef DEBUG
1da177e4
LT
984 /*
985 * Randomly don't execute the first algorithm.
986 */
987 int dofirst; /* set to do first algorithm */
988
ecb3403d 989 dofirst = prandom_u32() & 1;
1da177e4 990#endif
e26f0501 991
bfe46d4e
BF
992 /* handle unitialized agbno range so caller doesn't have to */
993 if (!args->min_agbno && !args->max_agbno)
994 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
995 ASSERT(args->min_agbno <= args->max_agbno);
996
997 /* clamp agbno to the range if it's outside */
998 if (args->agbno < args->min_agbno)
999 args->agbno = args->min_agbno;
1000 if (args->agbno > args->max_agbno)
1001 args->agbno = args->max_agbno;
1002
e26f0501
CH
1003restart:
1004 bno_cur_lt = NULL;
1005 bno_cur_gt = NULL;
1006 ltlen = 0;
1007 gtlena = 0;
1008 ltlena = 0;
ebf55872 1009 busy = false;
e26f0501 1010
1da177e4
LT
1011 /*
1012 * Get a cursor for the by-size btree.
1013 */
561f7d17
CH
1014 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1015 args->agno, XFS_BTNUM_CNT);
e26f0501 1016
1da177e4
LT
1017 /*
1018 * See if there are any free extents as big as maxlen.
1019 */
1020 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
1021 goto error0;
1022 /*
1023 * If none, then pick up the last entry in the tree unless the
1024 * tree is empty.
1025 */
1026 if (!i) {
1027 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1028 &ltlen, &i)))
1029 goto error0;
1030 if (i == 0 || ltlen == 0) {
1031 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
e26f0501 1032 trace_xfs_alloc_near_noentry(args);
1da177e4
LT
1033 return 0;
1034 }
1035 ASSERT(i == 1);
1036 }
1037 args->wasfromfl = 0;
e26f0501 1038
1da177e4
LT
1039 /*
1040 * First algorithm.
1041 * If the requested extent is large wrt the freespaces available
1042 * in this a.g., then the cursor will be pointing to a btree entry
1043 * near the right edge of the tree. If it's in the last btree leaf
1044 * block, then we just examine all the entries in that block
1045 * that are big enough, and pick the best one.
1046 * This is written as a while loop so we can break out of it,
1047 * but we never loop back to the top.
1048 */
1049 while (xfs_btree_islastblock(cnt_cur, 0)) {
1050 xfs_extlen_t bdiff;
1051 int besti=0;
1052 xfs_extlen_t blen=0;
1053 xfs_agblock_t bnew=0;
1054
63d20d6e
DC
1055#ifdef DEBUG
1056 if (dofirst)
1da177e4
LT
1057 break;
1058#endif
1059 /*
1060 * Start from the entry that lookup found, sequence through
1061 * all larger free blocks. If we're actually pointing at a
1062 * record smaller than maxlen, go to the start of this block,
1063 * and skip all those smaller than minlen.
1064 */
1065 if (ltlen || args->alignment > 1) {
1066 cnt_cur->bc_ptrs[0] = 1;
1067 do {
1068 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1069 &ltlen, &i)))
1070 goto error0;
c29aad41 1071 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4
LT
1072 if (ltlen >= args->minlen)
1073 break;
637aa50f 1074 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1da177e4
LT
1075 goto error0;
1076 } while (i);
1077 ASSERT(ltlen >= args->minlen);
1078 if (!i)
1079 break;
1080 }
1081 i = cnt_cur->bc_ptrs[0];
1082 for (j = 1, blen = 0, bdiff = 0;
1083 !error && j && (blen < args->maxlen || bdiff > 0);
637aa50f 1084 error = xfs_btree_increment(cnt_cur, 0, &j)) {
1da177e4
LT
1085 /*
1086 * For each entry, decide if it's better than
1087 * the previous best entry.
1088 */
1089 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1090 goto error0;
c29aad41 1091 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
ebf55872
CH
1092 busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
1093 &ltbnoa, &ltlena, &busy_gen);
e6430037 1094 if (ltlena < args->minlen)
1da177e4 1095 continue;
bfe46d4e
BF
1096 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1097 continue;
1da177e4
LT
1098 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1099 xfs_alloc_fix_len(args);
1100 ASSERT(args->len >= args->minlen);
1101 if (args->len < blen)
1102 continue;
1103 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
292378ed 1104 args->alignment, args->datatype, ltbnoa,
211d022c 1105 ltlena, &ltnew);
1da177e4
LT
1106 if (ltnew != NULLAGBLOCK &&
1107 (args->len > blen || ltdiff < bdiff)) {
1108 bdiff = ltdiff;
1109 bnew = ltnew;
1110 blen = args->len;
1111 besti = cnt_cur->bc_ptrs[0];
1112 }
1113 }
1114 /*
1115 * It didn't work. We COULD be in a case where
1116 * there's a good record somewhere, so try again.
1117 */
1118 if (blen == 0)
1119 break;
1120 /*
1121 * Point at the best entry, and retrieve it again.
1122 */
1123 cnt_cur->bc_ptrs[0] = besti;
1124 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1125 goto error0;
c29aad41 1126 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
73523a2e 1127 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1da177e4 1128 args->len = blen;
54fee133 1129
1da177e4
LT
1130 /*
1131 * We are allocating starting at bnew for blen blocks.
1132 */
1133 args->agbno = bnew;
1134 ASSERT(bnew >= ltbno);
73523a2e 1135 ASSERT(bnew + blen <= ltbno + ltlen);
1da177e4
LT
1136 /*
1137 * Set up a cursor for the by-bno tree.
1138 */
561f7d17
CH
1139 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1140 args->agbp, args->agno, XFS_BTNUM_BNO);
1da177e4
LT
1141 /*
1142 * Fix up the btree entries.
1143 */
1144 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1145 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1146 goto error0;
1147 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1148 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
0b1b213f
CH
1149
1150 trace_xfs_alloc_near_first(args);
1da177e4
LT
1151 return 0;
1152 }
1153 /*
1154 * Second algorithm.
1155 * Search in the by-bno tree to the left and to the right
1156 * simultaneously, until in each case we find a space big enough,
1157 * or run into the edge of the tree. When we run into the edge,
1158 * we deallocate that cursor.
1159 * If both searches succeed, we compare the two spaces and pick
1160 * the better one.
1161 * With alignment, it's possible for both to fail; the upper
1162 * level algorithm that picks allocation groups for allocations
1163 * is not supposed to do this.
1164 */
1165 /*
1166 * Allocate and initialize the cursor for the leftward search.
1167 */
561f7d17
CH
1168 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1169 args->agno, XFS_BTNUM_BNO);
1da177e4
LT
1170 /*
1171 * Lookup <= bno to find the leftward search's starting point.
1172 */
1173 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1174 goto error0;
1175 if (!i) {
1176 /*
1177 * Didn't find anything; use this cursor for the rightward
1178 * search.
1179 */
1180 bno_cur_gt = bno_cur_lt;
1181 bno_cur_lt = NULL;
1182 }
1183 /*
1184 * Found something. Duplicate the cursor for the rightward search.
1185 */
1186 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1187 goto error0;
1188 /*
1189 * Increment the cursor, so we will point at the entry just right
1190 * of the leftward entry if any, or to the leftmost entry.
1191 */
637aa50f 1192 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1da177e4
LT
1193 goto error0;
1194 if (!i) {
1195 /*
1196 * It failed, there are no rightward entries.
1197 */
1198 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1199 bno_cur_gt = NULL;
1200 }
1201 /*
1202 * Loop going left with the leftward cursor, right with the
1203 * rightward cursor, until either both directions give up or
1204 * we find an entry at least as big as minlen.
1205 */
1206 do {
1207 if (bno_cur_lt) {
1208 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1209 goto error0;
c29aad41 1210 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
ebf55872
CH
1211 busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
1212 &ltbnoa, &ltlena, &busy_gen);
bfe46d4e 1213 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1da177e4 1214 break;
8df4da4a 1215 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1da177e4 1216 goto error0;
bfe46d4e 1217 if (!i || ltbnoa < args->min_agbno) {
1da177e4
LT
1218 xfs_btree_del_cursor(bno_cur_lt,
1219 XFS_BTREE_NOERROR);
1220 bno_cur_lt = NULL;
1221 }
1222 }
1223 if (bno_cur_gt) {
1224 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1225 goto error0;
c29aad41 1226 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
ebf55872
CH
1227 busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
1228 &gtbnoa, &gtlena, &busy_gen);
bfe46d4e 1229 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1da177e4 1230 break;
637aa50f 1231 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1da177e4 1232 goto error0;
bfe46d4e 1233 if (!i || gtbnoa > args->max_agbno) {
1da177e4
LT
1234 xfs_btree_del_cursor(bno_cur_gt,
1235 XFS_BTREE_NOERROR);
1236 bno_cur_gt = NULL;
1237 }
1238 }
1239 } while (bno_cur_lt || bno_cur_gt);
489a150f 1240
1da177e4
LT
1241 /*
1242 * Got both cursors still active, need to find better entry.
1243 */
1244 if (bno_cur_lt && bno_cur_gt) {
1da177e4
LT
1245 if (ltlena >= args->minlen) {
1246 /*
489a150f 1247 * Left side is good, look for a right side entry.
1da177e4
LT
1248 */
1249 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1250 xfs_alloc_fix_len(args);
489a150f 1251 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
292378ed 1252 args->alignment, args->datatype, ltbnoa,
211d022c 1253 ltlena, &ltnew);
489a150f
CH
1254
1255 error = xfs_alloc_find_best_extent(args,
1256 &bno_cur_lt, &bno_cur_gt,
e26f0501
CH
1257 ltdiff, &gtbno, &gtlen,
1258 &gtbnoa, &gtlena,
489a150f
CH
1259 0 /* search right */);
1260 } else {
1261 ASSERT(gtlena >= args->minlen);
1262
1da177e4 1263 /*
489a150f 1264 * Right side is good, look for a left side entry.
1da177e4
LT
1265 */
1266 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1267 xfs_alloc_fix_len(args);
489a150f 1268 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
292378ed 1269 args->alignment, args->datatype, gtbnoa,
211d022c 1270 gtlena, &gtnew);
489a150f
CH
1271
1272 error = xfs_alloc_find_best_extent(args,
1273 &bno_cur_gt, &bno_cur_lt,
e26f0501
CH
1274 gtdiff, &ltbno, &ltlen,
1275 &ltbnoa, &ltlena,
489a150f 1276 1 /* search left */);
1da177e4 1277 }
489a150f
CH
1278
1279 if (error)
1280 goto error0;
1da177e4 1281 }
489a150f 1282
1da177e4
LT
1283 /*
1284 * If we couldn't get anything, give up.
1285 */
1286 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
e3a746f5
DC
1287 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1288
ebf55872 1289 if (busy) {
e26f0501 1290 trace_xfs_alloc_near_busy(args);
ebf55872 1291 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
e26f0501
CH
1292 goto restart;
1293 }
0b1b213f 1294 trace_xfs_alloc_size_neither(args);
1da177e4
LT
1295 args->agbno = NULLAGBLOCK;
1296 return 0;
1297 }
489a150f 1298
1da177e4
LT
1299 /*
1300 * At this point we have selected a freespace entry, either to the
1301 * left or to the right. If it's on the right, copy all the
1302 * useful variables to the "left" set so we only have one
1303 * copy of this code.
1304 */
1305 if (bno_cur_gt) {
1306 bno_cur_lt = bno_cur_gt;
1307 bno_cur_gt = NULL;
1308 ltbno = gtbno;
1309 ltbnoa = gtbnoa;
1310 ltlen = gtlen;
1311 ltlena = gtlena;
1312 j = 1;
1313 } else
1314 j = 0;
489a150f 1315
1da177e4
LT
1316 /*
1317 * Fix up the length and compute the useful address.
1318 */
1da177e4
LT
1319 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1320 xfs_alloc_fix_len(args);
1da177e4 1321 rlen = args->len;
e26f0501 1322 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
292378ed 1323 args->datatype, ltbnoa, ltlena, &ltnew);
1da177e4 1324 ASSERT(ltnew >= ltbno);
e26f0501 1325 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
16259e7d 1326 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
bfe46d4e 1327 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1da177e4 1328 args->agbno = ltnew;
e26f0501 1329
1da177e4
LT
1330 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1331 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1332 goto error0;
0b1b213f
CH
1333
1334 if (j)
1335 trace_xfs_alloc_near_greater(args);
1336 else
1337 trace_xfs_alloc_near_lesser(args);
1338
1da177e4
LT
1339 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1340 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1341 return 0;
1342
1343 error0:
0b1b213f 1344 trace_xfs_alloc_near_error(args);
1da177e4
LT
1345 if (cnt_cur != NULL)
1346 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1347 if (bno_cur_lt != NULL)
1348 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1349 if (bno_cur_gt != NULL)
1350 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1351 return error;
1352}
1353
1354/*
1355 * Allocate a variable extent anywhere in the allocation group agno.
1356 * Extent's length (returned in len) will be between minlen and maxlen,
1357 * and of the form k * prod + mod unless there's nothing that large.
1358 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1359 */
1360STATIC int /* error */
1361xfs_alloc_ag_vextent_size(
1362 xfs_alloc_arg_t *args) /* allocation argument structure */
1363{
1364 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1365 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1366 int error; /* error result */
1367 xfs_agblock_t fbno; /* start of found freespace */
1368 xfs_extlen_t flen; /* length of found freespace */
1da177e4
LT
1369 int i; /* temp status variable */
1370 xfs_agblock_t rbno; /* returned block number */
1371 xfs_extlen_t rlen; /* length of returned extent */
ebf55872
CH
1372 bool busy;
1373 unsigned busy_gen;
1da177e4 1374
e26f0501 1375restart:
1da177e4
LT
1376 /*
1377 * Allocate and initialize a cursor for the by-size btree.
1378 */
561f7d17
CH
1379 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1380 args->agno, XFS_BTNUM_CNT);
1da177e4 1381 bno_cur = NULL;
ebf55872 1382 busy = false;
e26f0501 1383
1da177e4
LT
1384 /*
1385 * Look for an entry >= maxlen+alignment-1 blocks.
1386 */
1387 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1388 args->maxlen + args->alignment - 1, &i)))
1389 goto error0;
e26f0501 1390
1da177e4 1391 /*
ebf55872
CH
1392 * If none then we have to settle for a smaller extent. In the case that
1393 * there are no large extents, this will return the last entry in the
1394 * tree unless the tree is empty. In the case that there are only busy
1395 * large extents, this will return the largest small extent unless there
e26f0501 1396 * are no smaller extents available.
1da177e4 1397 */
ebf55872 1398 if (!i) {
e26f0501
CH
1399 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1400 &fbno, &flen, &i);
1401 if (error)
1da177e4
LT
1402 goto error0;
1403 if (i == 0 || flen == 0) {
1404 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 1405 trace_xfs_alloc_size_noentry(args);
1da177e4
LT
1406 return 0;
1407 }
1408 ASSERT(i == 1);
ebf55872
CH
1409 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1410 &rlen, &busy_gen);
e26f0501
CH
1411 } else {
1412 /*
1413 * Search for a non-busy extent that is large enough.
e26f0501
CH
1414 */
1415 for (;;) {
1416 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1417 if (error)
1418 goto error0;
c29aad41 1419 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
e26f0501 1420
ebf55872
CH
1421 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1422 &rbno, &rlen, &busy_gen);
e26f0501
CH
1423
1424 if (rlen >= args->maxlen)
1425 break;
1426
1427 error = xfs_btree_increment(cnt_cur, 0, &i);
1428 if (error)
1429 goto error0;
1430 if (i == 0) {
1431 /*
1432 * Our only valid extents must have been busy.
1433 * Make it unbusy by forcing the log out and
ebf55872 1434 * retrying.
e26f0501
CH
1435 */
1436 xfs_btree_del_cursor(cnt_cur,
1437 XFS_BTREE_NOERROR);
1438 trace_xfs_alloc_size_busy(args);
ebf55872
CH
1439 xfs_extent_busy_flush(args->mp,
1440 args->pag, busy_gen);
e26f0501
CH
1441 goto restart;
1442 }
1443 }
1da177e4 1444 }
e26f0501 1445
1da177e4
LT
1446 /*
1447 * In the first case above, we got the last entry in the
1448 * by-size btree. Now we check to see if the space hits maxlen
1449 * once aligned; if not, we search left for something better.
1450 * This can't happen in the second case above.
1451 */
1da177e4 1452 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
c29aad41 1453 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1da177e4
LT
1454 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1455 if (rlen < args->maxlen) {
1456 xfs_agblock_t bestfbno;
1457 xfs_extlen_t bestflen;
1458 xfs_agblock_t bestrbno;
1459 xfs_extlen_t bestrlen;
1460
1461 bestrlen = rlen;
1462 bestrbno = rbno;
1463 bestflen = flen;
1464 bestfbno = fbno;
1465 for (;;) {
8df4da4a 1466 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1da177e4
LT
1467 goto error0;
1468 if (i == 0)
1469 break;
1470 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1471 &i)))
1472 goto error0;
c29aad41 1473 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4
LT
1474 if (flen < bestrlen)
1475 break;
ebf55872
CH
1476 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1477 &rbno, &rlen, &busy_gen);
1da177e4 1478 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
c29aad41 1479 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1da177e4
LT
1480 (rlen <= flen && rbno + rlen <= fbno + flen),
1481 error0);
1482 if (rlen > bestrlen) {
1483 bestrlen = rlen;
1484 bestrbno = rbno;
1485 bestflen = flen;
1486 bestfbno = fbno;
1487 if (rlen == args->maxlen)
1488 break;
1489 }
1490 }
1491 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1492 &i)))
1493 goto error0;
c29aad41 1494 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4
LT
1495 rlen = bestrlen;
1496 rbno = bestrbno;
1497 flen = bestflen;
1498 fbno = bestfbno;
1499 }
1500 args->wasfromfl = 0;
1501 /*
1502 * Fix up the length.
1503 */
1504 args->len = rlen;
e26f0501 1505 if (rlen < args->minlen) {
ebf55872 1506 if (busy) {
e26f0501
CH
1507 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1508 trace_xfs_alloc_size_busy(args);
ebf55872 1509 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
e26f0501
CH
1510 goto restart;
1511 }
1512 goto out_nominleft;
1da177e4 1513 }
e26f0501
CH
1514 xfs_alloc_fix_len(args);
1515
1da177e4 1516 rlen = args->len;
c29aad41 1517 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1da177e4
LT
1518 /*
1519 * Allocate and initialize a cursor for the by-block tree.
1520 */
561f7d17
CH
1521 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1522 args->agno, XFS_BTNUM_BNO);
1da177e4
LT
1523 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1524 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1525 goto error0;
1526 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1527 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1528 cnt_cur = bno_cur = NULL;
1529 args->len = rlen;
1530 args->agbno = rbno;
c29aad41 1531 XFS_WANT_CORRUPTED_GOTO(args->mp,
1da177e4 1532 args->agbno + args->len <=
16259e7d 1533 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1da177e4 1534 error0);
0b1b213f 1535 trace_xfs_alloc_size_done(args);
1da177e4
LT
1536 return 0;
1537
1538error0:
0b1b213f 1539 trace_xfs_alloc_size_error(args);
1da177e4
LT
1540 if (cnt_cur)
1541 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1542 if (bno_cur)
1543 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1544 return error;
e26f0501
CH
1545
1546out_nominleft:
1547 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1548 trace_xfs_alloc_size_nominleft(args);
1549 args->agbno = NULLAGBLOCK;
1550 return 0;
1da177e4
LT
1551}
1552
1553/*
1554 * Deal with the case where only small freespaces remain.
1555 * Either return the contents of the last freespace record,
1556 * or allocate space from the freelist if there is nothing in the tree.
1557 */
1558STATIC int /* error */
1559xfs_alloc_ag_vextent_small(
1560 xfs_alloc_arg_t *args, /* allocation argument structure */
1561 xfs_btree_cur_t *ccur, /* by-size cursor */
1562 xfs_agblock_t *fbnop, /* result block number */
1563 xfs_extlen_t *flenp, /* result length */
1564 int *stat) /* status: 0-freelist, 1-normal/none */
1565{
a03f1a66 1566 struct xfs_owner_info oinfo;
3fd129b6 1567 struct xfs_perag *pag;
1da177e4
LT
1568 int error;
1569 xfs_agblock_t fbno;
1570 xfs_extlen_t flen;
1da177e4
LT
1571 int i;
1572
8df4da4a 1573 if ((error = xfs_btree_decrement(ccur, 0, &i)))
1da177e4
LT
1574 goto error0;
1575 if (i) {
1576 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1577 goto error0;
c29aad41 1578 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4
LT
1579 }
1580 /*
1581 * Nothing in the btree, try the freelist. Make sure
1582 * to respect minleft even when pulling from the
1583 * freelist.
1584 */
3fd129b6
DW
1585 else if (args->minlen == 1 && args->alignment == 1 &&
1586 args->resv != XFS_AG_RESV_AGFL &&
16259e7d
CH
1587 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1588 > args->minleft)) {
92821e2b
DC
1589 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1590 if (error)
1da177e4
LT
1591 goto error0;
1592 if (fbno != NULLAGBLOCK) {
4ecbfe63 1593 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
292378ed 1594 xfs_alloc_allow_busy_reuse(args->datatype));
97d3ac75 1595
292378ed 1596 if (xfs_alloc_is_userdata(args->datatype)) {
1da177e4
LT
1597 xfs_buf_t *bp;
1598
1599 bp = xfs_btree_get_bufs(args->mp, args->tp,
1600 args->agno, fbno, 0);
93e8befc
ES
1601 if (!bp) {
1602 error = -EFSCORRUPTED;
1603 goto error0;
1604 }
1da177e4
LT
1605 xfs_trans_binval(args->tp, bp);
1606 }
1607 args->len = 1;
1608 args->agbno = fbno;
c29aad41 1609 XFS_WANT_CORRUPTED_GOTO(args->mp,
1da177e4 1610 args->agbno + args->len <=
16259e7d 1611 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1da177e4
LT
1612 error0);
1613 args->wasfromfl = 1;
0b1b213f 1614 trace_xfs_alloc_small_freelist(args);
a03f1a66
DW
1615
1616 /*
1617 * If we're feeding an AGFL block to something that
1618 * doesn't live in the free space, we need to clear
3fd129b6
DW
1619 * out the OWN_AG rmap and add the block back to
1620 * the AGFL per-AG reservation.
a03f1a66
DW
1621 */
1622 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1623 error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1624 fbno, 1, &oinfo);
1625 if (error)
1626 goto error0;
3fd129b6
DW
1627 pag = xfs_perag_get(args->mp, args->agno);
1628 xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
1629 args->tp, 1);
1630 xfs_perag_put(pag);
a03f1a66 1631
1da177e4
LT
1632 *stat = 0;
1633 return 0;
1634 }
1635 /*
1636 * Nothing in the freelist.
1637 */
1638 else
1639 flen = 0;
1640 }
1641 /*
1642 * Can't allocate from the freelist for some reason.
1643 */
d432c80e
NS
1644 else {
1645 fbno = NULLAGBLOCK;
1da177e4 1646 flen = 0;
d432c80e 1647 }
1da177e4
LT
1648 /*
1649 * Can't do the allocation, give up.
1650 */
1651 if (flen < args->minlen) {
1652 args->agbno = NULLAGBLOCK;
0b1b213f 1653 trace_xfs_alloc_small_notenough(args);
1da177e4
LT
1654 flen = 0;
1655 }
1656 *fbnop = fbno;
1657 *flenp = flen;
1658 *stat = 1;
0b1b213f 1659 trace_xfs_alloc_small_done(args);
1da177e4
LT
1660 return 0;
1661
1662error0:
0b1b213f 1663 trace_xfs_alloc_small_error(args);
1da177e4
LT
1664 return error;
1665}
1666
1667/*
1668 * Free the extent starting at agno/bno for length.
1669 */
340785cc 1670STATIC int
1da177e4 1671xfs_free_ag_extent(
340785cc
DW
1672 xfs_trans_t *tp,
1673 xfs_buf_t *agbp,
1674 xfs_agnumber_t agno,
1675 xfs_agblock_t bno,
1676 xfs_extlen_t len,
1677 struct xfs_owner_info *oinfo,
3fd129b6 1678 enum xfs_ag_resv_type type)
1da177e4
LT
1679{
1680 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1681 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
1682 int error; /* error return value */
1da177e4
LT
1683 xfs_agblock_t gtbno; /* start of right neighbor block */
1684 xfs_extlen_t gtlen; /* length of right neighbor block */
1685 int haveleft; /* have a left neighbor block */
1686 int haveright; /* have a right neighbor block */
1687 int i; /* temp, result code */
1688 xfs_agblock_t ltbno; /* start of left neighbor block */
1689 xfs_extlen_t ltlen; /* length of left neighbor block */
1690 xfs_mount_t *mp; /* mount point struct for filesystem */
1691 xfs_agblock_t nbno; /* new starting block of freespace */
1692 xfs_extlen_t nlen; /* new length of freespace */
ecb6928f 1693 xfs_perag_t *pag; /* per allocation group data */
1da177e4 1694
673930c3 1695 bno_cur = cnt_cur = NULL;
1da177e4 1696 mp = tp->t_mountp;
673930c3 1697
33df3a9c 1698 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
673930c3
DW
1699 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1700 if (error)
1701 goto error0;
1702 }
1703
1da177e4
LT
1704 /*
1705 * Allocate and initialize a cursor for the by-block btree.
1706 */
561f7d17 1707 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1da177e4
LT
1708 /*
1709 * Look for a neighboring block on the left (lower block numbers)
1710 * that is contiguous with this space.
1711 */
1712 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1713 goto error0;
1714 if (haveleft) {
1715 /*
1716 * There is a block to our left.
1717 */
1718 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1719 goto error0;
c29aad41 1720 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1721 /*
1722 * It's not contiguous, though.
1723 */
1724 if (ltbno + ltlen < bno)
1725 haveleft = 0;
1726 else {
1727 /*
1728 * If this failure happens the request to free this
1729 * space was invalid, it's (partly) already free.
1730 * Very bad.
1731 */
c29aad41
ES
1732 XFS_WANT_CORRUPTED_GOTO(mp,
1733 ltbno + ltlen <= bno, error0);
1da177e4
LT
1734 }
1735 }
1736 /*
1737 * Look for a neighboring block on the right (higher block numbers)
1738 * that is contiguous with this space.
1739 */
637aa50f 1740 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1da177e4
LT
1741 goto error0;
1742 if (haveright) {
1743 /*
1744 * There is a block to our right.
1745 */
1746 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1747 goto error0;
c29aad41 1748 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1749 /*
1750 * It's not contiguous, though.
1751 */
1752 if (bno + len < gtbno)
1753 haveright = 0;
1754 else {
1755 /*
1756 * If this failure happens the request to free this
1757 * space was invalid, it's (partly) already free.
1758 * Very bad.
1759 */
c29aad41 1760 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1da177e4
LT
1761 }
1762 }
1763 /*
1764 * Now allocate and initialize a cursor for the by-size tree.
1765 */
561f7d17 1766 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1da177e4
LT
1767 /*
1768 * Have both left and right contiguous neighbors.
1769 * Merge all three into a single free block.
1770 */
1771 if (haveleft && haveright) {
1772 /*
1773 * Delete the old by-size entry on the left.
1774 */
1775 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1776 goto error0;
c29aad41 1777 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
91cca5df 1778 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 1779 goto error0;
c29aad41 1780 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1781 /*
1782 * Delete the old by-size entry on the right.
1783 */
1784 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1785 goto error0;
c29aad41 1786 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
91cca5df 1787 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 1788 goto error0;
c29aad41 1789 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1790 /*
1791 * Delete the old by-block entry for the right block.
1792 */
91cca5df 1793 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 1794 goto error0;
c29aad41 1795 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1796 /*
1797 * Move the by-block cursor back to the left neighbor.
1798 */
8df4da4a 1799 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 1800 goto error0;
c29aad41 1801 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1802#ifdef DEBUG
1803 /*
1804 * Check that this is the right record: delete didn't
1805 * mangle the cursor.
1806 */
1807 {
1808 xfs_agblock_t xxbno;
1809 xfs_extlen_t xxlen;
1810
1811 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1812 &i)))
1813 goto error0;
c29aad41 1814 XFS_WANT_CORRUPTED_GOTO(mp,
1da177e4
LT
1815 i == 1 && xxbno == ltbno && xxlen == ltlen,
1816 error0);
1817 }
1818#endif
1819 /*
1820 * Update remaining by-block entry to the new, joined block.
1821 */
1822 nbno = ltbno;
1823 nlen = len + ltlen + gtlen;
1824 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1825 goto error0;
1826 }
1827 /*
1828 * Have only a left contiguous neighbor.
1829 * Merge it together with the new freespace.
1830 */
1831 else if (haveleft) {
1832 /*
1833 * Delete the old by-size entry on the left.
1834 */
1835 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1836 goto error0;
c29aad41 1837 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
91cca5df 1838 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 1839 goto error0;
c29aad41 1840 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1841 /*
1842 * Back up the by-block cursor to the left neighbor, and
1843 * update its length.
1844 */
8df4da4a 1845 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 1846 goto error0;
c29aad41 1847 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1848 nbno = ltbno;
1849 nlen = len + ltlen;
1850 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1851 goto error0;
1852 }
1853 /*
1854 * Have only a right contiguous neighbor.
1855 * Merge it together with the new freespace.
1856 */
1857 else if (haveright) {
1858 /*
1859 * Delete the old by-size entry on the right.
1860 */
1861 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1862 goto error0;
c29aad41 1863 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
91cca5df 1864 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 1865 goto error0;
c29aad41 1866 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1867 /*
1868 * Update the starting block and length of the right
1869 * neighbor in the by-block tree.
1870 */
1871 nbno = bno;
1872 nlen = len + gtlen;
1873 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1874 goto error0;
1875 }
1876 /*
1877 * No contiguous neighbors.
1878 * Insert the new freespace into the by-block tree.
1879 */
1880 else {
1881 nbno = bno;
1882 nlen = len;
4b22a571 1883 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 1884 goto error0;
c29aad41 1885 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1886 }
1887 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1888 bno_cur = NULL;
1889 /*
1890 * In all cases we need to insert the new freespace in the by-size tree.
1891 */
1892 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1893 goto error0;
c29aad41 1894 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
4b22a571 1895 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 1896 goto error0;
c29aad41 1897 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1898 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1899 cnt_cur = NULL;
ecb6928f 1900
1da177e4
LT
1901 /*
1902 * Update the freespace totals in the ag and superblock.
1903 */
ecb6928f
CH
1904 pag = xfs_perag_get(mp, agno);
1905 error = xfs_alloc_update_counters(tp, pag, agbp, len);
3fd129b6 1906 xfs_ag_resv_free_extent(pag, type, tp, len);
ecb6928f
CH
1907 xfs_perag_put(pag);
1908 if (error)
1909 goto error0;
1910
ff6d6af2
BD
1911 XFS_STATS_INC(mp, xs_freex);
1912 XFS_STATS_ADD(mp, xs_freeb, len);
0b1b213f 1913
3fd129b6
DW
1914 trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1915 haveleft, haveright);
1da177e4 1916
1da177e4
LT
1917 return 0;
1918
1919 error0:
3fd129b6
DW
1920 trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1921 -1, -1);
1da177e4
LT
1922 if (bno_cur)
1923 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1924 if (cnt_cur)
1925 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1926 return error;
1927}
1928
1929/*
1930 * Visible (exported) allocation/free functions.
1931 * Some of these are used just by xfs_alloc_btree.c and this file.
1932 */
1933
1934/*
1935 * Compute and fill in value of m_ag_maxlevels.
1936 */
1937void
1938xfs_alloc_compute_maxlevels(
1939 xfs_mount_t *mp) /* file system mount structure */
1940{
19b54ee6
DW
1941 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
1942 (mp->m_sb.sb_agblocks + 1) / 2);
1da177e4
LT
1943}
1944
6cc87645 1945/*
3fd129b6
DW
1946 * Find the length of the longest extent in an AG. The 'need' parameter
1947 * specifies how much space we're going to need for the AGFL and the
1948 * 'reserved' parameter tells us how many blocks in this AG are reserved for
1949 * other callers.
6cc87645
DC
1950 */
1951xfs_extlen_t
1952xfs_alloc_longest_free_extent(
1953 struct xfs_mount *mp,
50adbcb4 1954 struct xfs_perag *pag,
3fd129b6
DW
1955 xfs_extlen_t need,
1956 xfs_extlen_t reserved)
6cc87645 1957{
50adbcb4 1958 xfs_extlen_t delta = 0;
6cc87645 1959
3fd129b6
DW
1960 /*
1961 * If the AGFL needs a recharge, we'll have to subtract that from the
1962 * longest extent.
1963 */
6cc87645
DC
1964 if (need > pag->pagf_flcount)
1965 delta = need - pag->pagf_flcount;
1966
3fd129b6
DW
1967 /*
1968 * If we cannot maintain others' reservations with space from the
1969 * not-longest freesp extents, we'll have to subtract /that/ from
1970 * the longest extent too.
1971 */
1972 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1973 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1974
1975 /*
1976 * If the longest extent is long enough to satisfy all the
1977 * reservations and AGFL rules in place, we can return this extent.
1978 */
6cc87645
DC
1979 if (pag->pagf_longest > delta)
1980 return pag->pagf_longest - delta;
3fd129b6
DW
1981
1982 /* Otherwise, let the caller try for 1 block if there's space. */
6cc87645
DC
1983 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1984}
1985
496817b4
DC
1986unsigned int
1987xfs_alloc_min_freelist(
1988 struct xfs_mount *mp,
1989 struct xfs_perag *pag)
1990{
1991 unsigned int min_free;
1992
1993 /* space needed by-bno freespace btree */
1994 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1995 mp->m_ag_maxlevels);
1996 /* space needed by-size freespace btree */
1997 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1998 mp->m_ag_maxlevels);
52548852
DW
1999 /* space needed reverse mapping used space btree */
2000 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2001 min_free += min_t(unsigned int,
2002 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
2003 mp->m_rmap_maxlevels);
496817b4
DC
2004
2005 return min_free;
2006}
2007
72d55285
DC
2008/*
2009 * Check if the operation we are fixing up the freelist for should go ahead or
2010 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2011 * is dependent on whether the size and shape of free space available will
2012 * permit the requested allocation to take place.
2013 */
2014static bool
2015xfs_alloc_space_available(
2016 struct xfs_alloc_arg *args,
2017 xfs_extlen_t min_free,
2018 int flags)
2019{
2020 struct xfs_perag *pag = args->pag;
12ef8301 2021 xfs_extlen_t alloc_len, longest;
3fd129b6 2022 xfs_extlen_t reservation; /* blocks that are still reserved */
72d55285
DC
2023 int available;
2024
2025 if (flags & XFS_ALLOC_FLAG_FREEING)
2026 return true;
2027
3fd129b6
DW
2028 reservation = xfs_ag_resv_needed(pag, args->resv);
2029
72d55285 2030 /* do we have enough contiguous free space for the allocation? */
12ef8301 2031 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
3fd129b6
DW
2032 longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
2033 reservation);
12ef8301 2034 if (longest < alloc_len)
72d55285
DC
2035 return false;
2036
3fd129b6 2037 /* do we have enough free space remaining for the allocation? */
72d55285 2038 available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
54fee133 2039 reservation - min_free - args->minleft);
12ef8301 2040 if (available < (int)max(args->total, alloc_len))
72d55285
DC
2041 return false;
2042
54fee133
CH
2043 /*
2044 * Clamp maxlen to the amount of free space available for the actual
2045 * extent allocation.
2046 */
2047 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2048 args->maxlen = available;
2049 ASSERT(args->maxlen > 0);
2050 ASSERT(args->maxlen >= args->minlen);
2051 }
2052
72d55285
DC
2053 return true;
2054}
2055
1da177e4
LT
2056/*
2057 * Decide whether to use this allocation group for this allocation.
2058 * If so, fix up the btree freelist's size.
2059 */
2e9101da 2060int /* error */
1da177e4 2061xfs_alloc_fix_freelist(
396503fc
DC
2062 struct xfs_alloc_arg *args, /* allocation argument structure */
2063 int flags) /* XFS_ALLOC_FLAG_... */
1da177e4 2064{
396503fc
DC
2065 struct xfs_mount *mp = args->mp;
2066 struct xfs_perag *pag = args->pag;
2067 struct xfs_trans *tp = args->tp;
2068 struct xfs_buf *agbp = NULL;
2069 struct xfs_buf *agflbp = NULL;
2070 struct xfs_alloc_arg targs; /* local allocation arguments */
2071 xfs_agblock_t bno; /* freelist block */
2072 xfs_extlen_t need; /* total blocks needed in freelist */
c184f855 2073 int error = 0;
396503fc 2074
1da177e4 2075 if (!pag->pagf_init) {
396503fc
DC
2076 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2077 if (error)
2078 goto out_no_agbp;
1da177e4 2079 if (!pag->pagf_init) {
0e1edbd9
NS
2080 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2081 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2082 goto out_agbp_relse;
1da177e4 2083 }
396503fc 2084 }
1da177e4 2085
0e1edbd9 2086 /*
396503fc
DC
2087 * If this is a metadata preferred pag and we are user data then try
2088 * somewhere else if we are not being asked to try harder at this
2089 * point
1da177e4 2090 */
292378ed 2091 if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
0e1edbd9
NS
2092 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2093 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2094 goto out_agbp_relse;
1da177e4
LT
2095 }
2096
496817b4 2097 need = xfs_alloc_min_freelist(mp, pag);
54fee133
CH
2098 if (!xfs_alloc_space_available(args, need, flags |
2099 XFS_ALLOC_FLAG_CHECK))
396503fc 2100 goto out_agbp_relse;
0e1edbd9 2101
1da177e4
LT
2102 /*
2103 * Get the a.g. freespace buffer.
2104 * Can fail if we're not blocking on locks, and it's held.
2105 */
396503fc
DC
2106 if (!agbp) {
2107 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2108 if (error)
2109 goto out_no_agbp;
2110 if (!agbp) {
0e1edbd9
NS
2111 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2112 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2113 goto out_no_agbp;
0e1edbd9 2114 }
1da177e4 2115 }
50adbcb4 2116
50adbcb4 2117 /* If there isn't enough total space or single-extent, reject it. */
496817b4 2118 need = xfs_alloc_min_freelist(mp, pag);
396503fc
DC
2119 if (!xfs_alloc_space_available(args, need, flags))
2120 goto out_agbp_relse;
72d55285 2121
1da177e4
LT
2122 /*
2123 * Make the freelist shorter if it's too long.
50adbcb4 2124 *
396503fc
DC
2125 * Note that from this point onwards, we will always release the agf and
2126 * agfl buffers on error. This handles the case where we error out and
2127 * the buffers are clean or may not have been joined to the transaction
2128 * and hence need to be released manually. If they have been joined to
2129 * the transaction, then xfs_trans_brelse() will handle them
2130 * appropriately based on the recursion count and dirty state of the
2131 * buffer.
2132 *
50adbcb4
DC
2133 * XXX (dgc): When we have lots of free space, does this buy us
2134 * anything other than extra overhead when we need to put more blocks
2135 * back on the free list? Maybe we should only do this when space is
2136 * getting low or the AGFL is more than half full?
04f13060
DW
2137 *
2138 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2139 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2140 * updating the rmapbt. Both flags are used in xfs_repair while we're
2141 * rebuilding the rmapbt, and neither are used by the kernel. They're
2142 * both required to ensure that rmaps are correctly recorded for the
2143 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2144 * repair/rmap.c in xfsprogs for details.
1da177e4 2145 */
04f13060
DW
2146 memset(&targs, 0, sizeof(targs));
2147 if (flags & XFS_ALLOC_FLAG_NORMAP)
2148 xfs_rmap_skip_owner_update(&targs.oinfo);
2149 else
2150 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2151 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
50adbcb4 2152 struct xfs_buf *bp;
1da177e4 2153
92821e2b
DC
2154 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2155 if (error)
396503fc 2156 goto out_agbp_relse;
340785cc 2157 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
3fd129b6 2158 &targs.oinfo, XFS_AG_RESV_AGFL);
50adbcb4 2159 if (error)
396503fc 2160 goto out_agbp_relse;
1da177e4 2161 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
93e8befc
ES
2162 if (!bp) {
2163 error = -EFSCORRUPTED;
2164 goto out_agbp_relse;
2165 }
1da177e4
LT
2166 xfs_trans_binval(tp, bp);
2167 }
50adbcb4 2168
1da177e4
LT
2169 targs.tp = tp;
2170 targs.mp = mp;
2171 targs.agbp = agbp;
2172 targs.agno = args->agno;
3fd129b6 2173 targs.alignment = targs.minlen = targs.prod = 1;
1da177e4
LT
2174 targs.type = XFS_ALLOCTYPE_THIS_AG;
2175 targs.pag = pag;
50adbcb4
DC
2176 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2177 if (error)
396503fc 2178 goto out_agbp_relse;
50adbcb4
DC
2179
2180 /* Make the freelist longer if it's too short. */
2181 while (pag->pagf_flcount < need) {
1da177e4 2182 targs.agbno = 0;
50adbcb4 2183 targs.maxlen = need - pag->pagf_flcount;
3fd129b6 2184 targs.resv = XFS_AG_RESV_AGFL;
50adbcb4
DC
2185
2186 /* Allocate as many blocks as possible at once. */
2187 error = xfs_alloc_ag_vextent(&targs);
396503fc
DC
2188 if (error)
2189 goto out_agflbp_relse;
2190
1da177e4
LT
2191 /*
2192 * Stop if we run out. Won't happen if callers are obeying
2193 * the restrictions correctly. Can happen for free calls
2194 * on a completely full ag.
2195 */
d210a28c 2196 if (targs.agbno == NULLAGBLOCK) {
0e1edbd9
NS
2197 if (flags & XFS_ALLOC_FLAG_FREEING)
2198 break;
396503fc 2199 goto out_agflbp_relse;
d210a28c 2200 }
1da177e4
LT
2201 /*
2202 * Put each allocated block on the list.
2203 */
2204 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
92821e2b
DC
2205 error = xfs_alloc_put_freelist(tp, agbp,
2206 agflbp, bno, 0);
2207 if (error)
396503fc 2208 goto out_agflbp_relse;
1da177e4
LT
2209 }
2210 }
e63a3690 2211 xfs_trans_brelse(tp, agflbp);
1da177e4
LT
2212 args->agbp = agbp;
2213 return 0;
396503fc
DC
2214
2215out_agflbp_relse:
2216 xfs_trans_brelse(tp, agflbp);
2217out_agbp_relse:
2218 if (agbp)
2219 xfs_trans_brelse(tp, agbp);
2220out_no_agbp:
2221 args->agbp = NULL;
2222 return error;
1da177e4
LT
2223}
2224
2225/*
2226 * Get a block from the freelist.
2227 * Returns with the buffer for the block gotten.
2228 */
2229int /* error */
2230xfs_alloc_get_freelist(
2231 xfs_trans_t *tp, /* transaction pointer */
2232 xfs_buf_t *agbp, /* buffer containing the agf structure */
92821e2b
DC
2233 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2234 int btreeblk) /* destination is a AGF btree */
1da177e4
LT
2235{
2236 xfs_agf_t *agf; /* a.g. freespace structure */
1da177e4
LT
2237 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2238 xfs_agblock_t bno; /* block number returned */
77c95bba 2239 __be32 *agfl_bno;
1da177e4 2240 int error;
92821e2b 2241 int logflags;
77c95bba 2242 xfs_mount_t *mp = tp->t_mountp;
1da177e4
LT
2243 xfs_perag_t *pag; /* per allocation group data */
2244
1da177e4
LT
2245 /*
2246 * Freelist is empty, give up.
2247 */
77c95bba 2248 agf = XFS_BUF_TO_AGF(agbp);
1da177e4
LT
2249 if (!agf->agf_flcount) {
2250 *bnop = NULLAGBLOCK;
2251 return 0;
2252 }
2253 /*
2254 * Read the array of free blocks.
2255 */
77c95bba
CH
2256 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2257 &agflbp);
2258 if (error)
1da177e4 2259 return error;
77c95bba
CH
2260
2261
1da177e4
LT
2262 /*
2263 * Get the block number and update the data structures.
2264 */
77c95bba
CH
2265 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2266 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
413d57c9 2267 be32_add_cpu(&agf->agf_flfirst, 1);
1da177e4 2268 xfs_trans_brelse(tp, agflbp);
16259e7d 2269 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
1da177e4 2270 agf->agf_flfirst = 0;
a862e0fd
DC
2271
2272 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
413d57c9 2273 be32_add_cpu(&agf->agf_flcount, -1);
1da177e4
LT
2274 xfs_trans_agflist_delta(tp, -1);
2275 pag->pagf_flcount--;
a862e0fd 2276 xfs_perag_put(pag);
92821e2b
DC
2277
2278 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2279 if (btreeblk) {
413d57c9 2280 be32_add_cpu(&agf->agf_btreeblks, 1);
92821e2b
DC
2281 pag->pagf_btreeblks++;
2282 logflags |= XFS_AGF_BTREEBLKS;
2283 }
2284
92821e2b 2285 xfs_alloc_log_agf(tp, agbp, logflags);
1da177e4
LT
2286 *bnop = bno;
2287
1da177e4
LT
2288 return 0;
2289}
2290
2291/*
2292 * Log the given fields from the agf structure.
2293 */
2294void
2295xfs_alloc_log_agf(
2296 xfs_trans_t *tp, /* transaction pointer */
2297 xfs_buf_t *bp, /* buffer for a.g. freelist header */
2298 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2299{
2300 int first; /* first byte offset */
2301 int last; /* last byte offset */
2302 static const short offsets[] = {
2303 offsetof(xfs_agf_t, agf_magicnum),
2304 offsetof(xfs_agf_t, agf_versionnum),
2305 offsetof(xfs_agf_t, agf_seqno),
2306 offsetof(xfs_agf_t, agf_length),
2307 offsetof(xfs_agf_t, agf_roots[0]),
2308 offsetof(xfs_agf_t, agf_levels[0]),
2309 offsetof(xfs_agf_t, agf_flfirst),
2310 offsetof(xfs_agf_t, agf_fllast),
2311 offsetof(xfs_agf_t, agf_flcount),
2312 offsetof(xfs_agf_t, agf_freeblks),
2313 offsetof(xfs_agf_t, agf_longest),
92821e2b 2314 offsetof(xfs_agf_t, agf_btreeblks),
4e0e6040 2315 offsetof(xfs_agf_t, agf_uuid),
f32866fd 2316 offsetof(xfs_agf_t, agf_rmap_blocks),
bdf28630
DW
2317 offsetof(xfs_agf_t, agf_refcount_blocks),
2318 offsetof(xfs_agf_t, agf_refcount_root),
2319 offsetof(xfs_agf_t, agf_refcount_level),
da1f039d
DW
2320 /* needed so that we don't log the whole rest of the structure: */
2321 offsetof(xfs_agf_t, agf_spare64),
1da177e4
LT
2322 sizeof(xfs_agf_t)
2323 };
2324
0b1b213f
CH
2325 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2326
61fe135c 2327 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
4e0e6040 2328
1da177e4
LT
2329 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2330 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2331}
2332
2333/*
2334 * Interface for inode allocation to force the pag data to be initialized.
2335 */
2336int /* error */
2337xfs_alloc_pagf_init(
2338 xfs_mount_t *mp, /* file system mount structure */
2339 xfs_trans_t *tp, /* transaction pointer */
2340 xfs_agnumber_t agno, /* allocation group number */
2341 int flags) /* XFS_ALLOC_FLAGS_... */
2342{
2343 xfs_buf_t *bp;
2344 int error;
2345
2346 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2347 return error;
2348 if (bp)
2349 xfs_trans_brelse(tp, bp);
2350 return 0;
2351}
2352
2353/*
2354 * Put the block on the freelist for the allocation group.
2355 */
2356int /* error */
2357xfs_alloc_put_freelist(
2358 xfs_trans_t *tp, /* transaction pointer */
2359 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2360 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
92821e2b
DC
2361 xfs_agblock_t bno, /* block being freed */
2362 int btreeblk) /* block came from a AGF btree */
1da177e4
LT
2363{
2364 xfs_agf_t *agf; /* a.g. freespace structure */
e2101005 2365 __be32 *blockp;/* pointer to array entry */
1da177e4 2366 int error;
92821e2b 2367 int logflags;
1da177e4
LT
2368 xfs_mount_t *mp; /* mount structure */
2369 xfs_perag_t *pag; /* per allocation group data */
77c95bba
CH
2370 __be32 *agfl_bno;
2371 int startoff;
1da177e4
LT
2372
2373 agf = XFS_BUF_TO_AGF(agbp);
2374 mp = tp->t_mountp;
2375
2376 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
16259e7d 2377 be32_to_cpu(agf->agf_seqno), &agflbp)))
1da177e4 2378 return error;
413d57c9 2379 be32_add_cpu(&agf->agf_fllast, 1);
16259e7d 2380 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
1da177e4 2381 agf->agf_fllast = 0;
a862e0fd
DC
2382
2383 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
413d57c9 2384 be32_add_cpu(&agf->agf_flcount, 1);
1da177e4
LT
2385 xfs_trans_agflist_delta(tp, 1);
2386 pag->pagf_flcount++;
92821e2b
DC
2387
2388 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2389 if (btreeblk) {
413d57c9 2390 be32_add_cpu(&agf->agf_btreeblks, -1);
92821e2b
DC
2391 pag->pagf_btreeblks--;
2392 logflags |= XFS_AGF_BTREEBLKS;
2393 }
a862e0fd 2394 xfs_perag_put(pag);
92821e2b 2395
92821e2b
DC
2396 xfs_alloc_log_agf(tp, agbp, logflags);
2397
16259e7d 2398 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
77c95bba
CH
2399
2400 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2401 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
e2101005 2402 *blockp = cpu_to_be32(bno);
77c95bba
CH
2403 startoff = (char *)blockp - (char *)agflbp->b_addr;
2404
92821e2b 2405 xfs_alloc_log_agf(tp, agbp, logflags);
77c95bba 2406
61fe135c 2407 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
77c95bba
CH
2408 xfs_trans_log_buf(tp, agflbp, startoff,
2409 startoff + sizeof(xfs_agblock_t) - 1);
1da177e4
LT
2410 return 0;
2411}
2412
a6a781a5 2413static xfs_failaddr_t
612cfbfe 2414xfs_agf_verify(
b5572597
DW
2415 struct xfs_buf *bp)
2416{
2417 struct xfs_mount *mp = bp->b_target->bt_mount;
2418 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
5d5f527d 2419
a45086e2
BF
2420 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2421 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 2422 return __this_address;
a45086e2
BF
2423 if (!xfs_log_check_lsn(mp,
2424 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
a6a781a5 2425 return __this_address;
a45086e2 2426 }
5d5f527d 2427
4e0e6040
DC
2428 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2429 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2430 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2431 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2432 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2433 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
a6a781a5 2434 return __this_address;
5d5f527d 2435
d2a047f3
DW
2436 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2437 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2438 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
e1b05723 2439 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
a6a781a5 2440 return __this_address;
e1b05723 2441
b8704944 2442 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
d2a047f3
DW
2443 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2444 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
a6a781a5 2445 return __this_address;
b8704944 2446
5d5f527d
DC
2447 /*
2448 * during growfs operations, the perag is not fully initialised,
2449 * so we can't use it for any useful checking. growfs ensures we can't
2450 * use it by using uncached buffers that don't have the perag attached
2451 * so we can detect and avoid this problem.
2452 */
4e0e6040 2453 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
a6a781a5 2454 return __this_address;
5d5f527d 2455
4e0e6040
DC
2456 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2457 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
a6a781a5 2458 return __this_address;
4e0e6040 2459
46eeb521 2460 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
d2a047f3
DW
2461 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2462 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
a6a781a5 2463 return __this_address;
46eeb521 2464
a6a781a5 2465 return NULL;
5d5f527d 2466
612cfbfe
DC
2467}
2468
1813dd64
DC
2469static void
2470xfs_agf_read_verify(
612cfbfe
DC
2471 struct xfs_buf *bp)
2472{
4e0e6040 2473 struct xfs_mount *mp = bp->b_target->bt_mount;
bc1a09b8 2474 xfs_failaddr_t fa;
4e0e6040 2475
ce5028cf
ES
2476 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2477 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
bc1a09b8
DW
2478 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2479 else {
b5572597 2480 fa = xfs_agf_verify(bp);
bc1a09b8
DW
2481 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2482 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2483 }
612cfbfe 2484}
5d5f527d 2485
b0f539de 2486static void
1813dd64 2487xfs_agf_write_verify(
612cfbfe
DC
2488 struct xfs_buf *bp)
2489{
fb1755a6
CM
2490 struct xfs_mount *mp = bp->b_target->bt_mount;
2491 struct xfs_buf_log_item *bip = bp->b_log_item;
bc1a09b8 2492 xfs_failaddr_t fa;
4e0e6040 2493
b5572597 2494 fa = xfs_agf_verify(bp);
bc1a09b8
DW
2495 if (fa) {
2496 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
4e0e6040
DC
2497 return;
2498 }
2499
2500 if (!xfs_sb_version_hascrc(&mp->m_sb))
2501 return;
2502
2503 if (bip)
2504 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2505
f1dbcd7e 2506 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
5d5f527d
DC
2507}
2508
1813dd64 2509const struct xfs_buf_ops xfs_agf_buf_ops = {
233135b7 2510 .name = "xfs_agf",
1813dd64
DC
2511 .verify_read = xfs_agf_read_verify,
2512 .verify_write = xfs_agf_write_verify,
b5572597 2513 .verify_struct = xfs_agf_verify,
1813dd64
DC
2514};
2515
1da177e4
LT
2516/*
2517 * Read in the allocation group header (free/alloc section).
2518 */
2519int /* error */
4805621a
CH
2520xfs_read_agf(
2521 struct xfs_mount *mp, /* mount point structure */
2522 struct xfs_trans *tp, /* transaction pointer */
2523 xfs_agnumber_t agno, /* allocation group number */
2524 int flags, /* XFS_BUF_ */
2525 struct xfs_buf **bpp) /* buffer for the ag freelist header */
1da177e4 2526{
1da177e4
LT
2527 int error;
2528
d123031a
DC
2529 trace_xfs_read_agf(mp, agno);
2530
1da177e4
LT
2531 ASSERT(agno != NULLAGNUMBER);
2532 error = xfs_trans_read_buf(
2533 mp, tp, mp->m_ddev_targp,
2534 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
1813dd64 2535 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
1da177e4
LT
2536 if (error)
2537 return error;
4805621a 2538 if (!*bpp)
1da177e4 2539 return 0;
4805621a 2540
5a52c2a5 2541 ASSERT(!(*bpp)->b_error);
38f23232 2542 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
4805621a
CH
2543 return 0;
2544}
2545
2546/*
2547 * Read in the allocation group header (free/alloc section).
2548 */
2549int /* error */
2550xfs_alloc_read_agf(
2551 struct xfs_mount *mp, /* mount point structure */
2552 struct xfs_trans *tp, /* transaction pointer */
2553 xfs_agnumber_t agno, /* allocation group number */
2554 int flags, /* XFS_ALLOC_FLAG_... */
2555 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2556{
2557 struct xfs_agf *agf; /* ag freelist header */
2558 struct xfs_perag *pag; /* per allocation group data */
2559 int error;
2560
d123031a 2561 trace_xfs_alloc_read_agf(mp, agno);
4805621a 2562
d123031a 2563 ASSERT(agno != NULLAGNUMBER);
4805621a 2564 error = xfs_read_agf(mp, tp, agno,
0cadda1c 2565 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
4805621a
CH
2566 bpp);
2567 if (error)
2568 return error;
2569 if (!*bpp)
2570 return 0;
5a52c2a5 2571 ASSERT(!(*bpp)->b_error);
4805621a
CH
2572
2573 agf = XFS_BUF_TO_AGF(*bpp);
a862e0fd 2574 pag = xfs_perag_get(mp, agno);
1da177e4 2575 if (!pag->pagf_init) {
16259e7d 2576 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
92821e2b 2577 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
16259e7d
CH
2578 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2579 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
1da177e4 2580 pag->pagf_levels[XFS_BTNUM_BNOi] =
16259e7d 2581 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
1da177e4 2582 pag->pagf_levels[XFS_BTNUM_CNTi] =
16259e7d 2583 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
b8704944
DW
2584 pag->pagf_levels[XFS_BTNUM_RMAPi] =
2585 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
46eeb521 2586 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
007c61c6 2587 spin_lock_init(&pag->pagb_lock);
e57336ff 2588 pag->pagb_count = 0;
ed3b4d6c 2589 pag->pagb_tree = RB_ROOT;
1da177e4
LT
2590 pag->pagf_init = 1;
2591 }
2592#ifdef DEBUG
2593 else if (!XFS_FORCED_SHUTDOWN(mp)) {
16259e7d 2594 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
89b28393 2595 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
16259e7d
CH
2596 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2597 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
1da177e4 2598 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
16259e7d 2599 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
1da177e4 2600 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
16259e7d 2601 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
1da177e4
LT
2602 }
2603#endif
a862e0fd 2604 xfs_perag_put(pag);
1da177e4
LT
2605 return 0;
2606}
2607
2608/*
2609 * Allocate an extent (variable-size).
2610 * Depending on the allocation type, we either look in a single allocation
2611 * group or loop over the allocation groups to find the result.
2612 */
2613int /* error */
e04426b9 2614xfs_alloc_vextent(
1da177e4
LT
2615 xfs_alloc_arg_t *args) /* allocation argument structure */
2616{
2617 xfs_agblock_t agsize; /* allocation group size */
2618 int error;
2619 int flags; /* XFS_ALLOC_FLAG_... locking flags */
1da177e4
LT
2620 xfs_mount_t *mp; /* mount structure pointer */
2621 xfs_agnumber_t sagno; /* starting allocation group number */
2622 xfs_alloctype_t type; /* input allocation type */
2623 int bump_rotor = 0;
1da177e4
LT
2624 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2625
2626 mp = args->mp;
2627 type = args->otype = args->type;
2628 args->agbno = NULLAGBLOCK;
2629 /*
2630 * Just fix this up, for the case where the last a.g. is shorter
2631 * (or there's only one a.g.) and the caller couldn't easily figure
2632 * that out (xfs_bmap_alloc).
2633 */
2634 agsize = mp->m_sb.sb_agblocks;
2635 if (args->maxlen > agsize)
2636 args->maxlen = agsize;
2637 if (args->alignment == 0)
2638 args->alignment = 1;
2639 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2640 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2641 ASSERT(args->minlen <= args->maxlen);
2642 ASSERT(args->minlen <= agsize);
2643 ASSERT(args->mod < args->prod);
2644 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2645 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2646 args->minlen > args->maxlen || args->minlen > agsize ||
2647 args->mod >= args->prod) {
2648 args->fsbno = NULLFSBLOCK;
0b1b213f 2649 trace_xfs_alloc_vextent_badargs(args);
1da177e4
LT
2650 return 0;
2651 }
1da177e4
LT
2652
2653 switch (type) {
2654 case XFS_ALLOCTYPE_THIS_AG:
2655 case XFS_ALLOCTYPE_NEAR_BNO:
2656 case XFS_ALLOCTYPE_THIS_BNO:
2657 /*
2658 * These three force us into a single a.g.
2659 */
2660 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
a862e0fd 2661 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 2662 error = xfs_alloc_fix_freelist(args, 0);
1da177e4 2663 if (error) {
0b1b213f 2664 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
2665 goto error0;
2666 }
2667 if (!args->agbp) {
0b1b213f 2668 trace_xfs_alloc_vextent_noagbp(args);
1da177e4
LT
2669 break;
2670 }
2671 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2672 if ((error = xfs_alloc_ag_vextent(args)))
2673 goto error0;
1da177e4
LT
2674 break;
2675 case XFS_ALLOCTYPE_START_BNO:
2676 /*
2677 * Try near allocation first, then anywhere-in-ag after
2678 * the first a.g. fails.
2679 */
292378ed 2680 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
1da177e4
LT
2681 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2682 args->fsbno = XFS_AGB_TO_FSB(mp,
2683 ((mp->m_agfrotor / rotorstep) %
2684 mp->m_sb.sb_agcount), 0);
2685 bump_rotor = 1;
2686 }
2687 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2688 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2689 /* FALLTHROUGH */
1da177e4
LT
2690 case XFS_ALLOCTYPE_FIRST_AG:
2691 /*
2692 * Rotate through the allocation groups looking for a winner.
2693 */
8d242e93 2694 if (type == XFS_ALLOCTYPE_FIRST_AG) {
1da177e4
LT
2695 /*
2696 * Start with allocation group given by bno.
2697 */
2698 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2699 args->type = XFS_ALLOCTYPE_THIS_AG;
2700 sagno = 0;
2701 flags = 0;
2702 } else {
1da177e4
LT
2703 /*
2704 * Start with the given allocation group.
2705 */
2706 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2707 flags = XFS_ALLOC_FLAG_TRYLOCK;
2708 }
2709 /*
2710 * Loop over allocation groups twice; first time with
2711 * trylock set, second time without.
2712 */
1da177e4 2713 for (;;) {
a862e0fd 2714 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 2715 error = xfs_alloc_fix_freelist(args, flags);
1da177e4 2716 if (error) {
0b1b213f 2717 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
2718 goto error0;
2719 }
2720 /*
2721 * If we get a buffer back then the allocation will fly.
2722 */
2723 if (args->agbp) {
2724 if ((error = xfs_alloc_ag_vextent(args)))
2725 goto error0;
2726 break;
2727 }
0b1b213f
CH
2728
2729 trace_xfs_alloc_vextent_loopfailed(args);
2730
1da177e4
LT
2731 /*
2732 * Didn't work, figure out the next iteration.
2733 */
2734 if (args->agno == sagno &&
2735 type == XFS_ALLOCTYPE_START_BNO)
2736 args->type = XFS_ALLOCTYPE_THIS_AG;
d210a28c
YL
2737 /*
2738 * For the first allocation, we can try any AG to get
2739 * space. However, if we already have allocated a
2740 * block, we don't want to try AGs whose number is below
2741 * sagno. Otherwise, we may end up with out-of-order
2742 * locking of AGF, which might cause deadlock.
2743 */
2744 if (++(args->agno) == mp->m_sb.sb_agcount) {
2745 if (args->firstblock != NULLFSBLOCK)
2746 args->agno = sagno;
2747 else
2748 args->agno = 0;
2749 }
1da177e4
LT
2750 /*
2751 * Reached the starting a.g., must either be done
2752 * or switch to non-trylock mode.
2753 */
2754 if (args->agno == sagno) {
255c5162 2755 if (flags == 0) {
1da177e4 2756 args->agbno = NULLAGBLOCK;
0b1b213f 2757 trace_xfs_alloc_vextent_allfailed(args);
1da177e4
LT
2758 break;
2759 }
255c5162
CH
2760
2761 flags = 0;
2762 if (type == XFS_ALLOCTYPE_START_BNO) {
2763 args->agbno = XFS_FSB_TO_AGBNO(mp,
2764 args->fsbno);
2765 args->type = XFS_ALLOCTYPE_NEAR_BNO;
1da177e4
LT
2766 }
2767 }
a862e0fd 2768 xfs_perag_put(args->pag);
1da177e4 2769 }
8d242e93 2770 if (bump_rotor) {
1da177e4
LT
2771 if (args->agno == sagno)
2772 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2773 (mp->m_sb.sb_agcount * rotorstep);
2774 else
2775 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2776 (mp->m_sb.sb_agcount * rotorstep);
2777 }
2778 break;
2779 default:
2780 ASSERT(0);
2781 /* NOTREACHED */
2782 }
2783 if (args->agbno == NULLAGBLOCK)
2784 args->fsbno = NULLFSBLOCK;
2785 else {
2786 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2787#ifdef DEBUG
2788 ASSERT(args->len >= args->minlen);
2789 ASSERT(args->len <= args->maxlen);
2790 ASSERT(args->agbno % args->alignment == 0);
2791 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2792 args->len);
2793#endif
3fbbbea3
DC
2794
2795 /* Zero the extent if we were asked to do so */
292378ed 2796 if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
3fbbbea3
DC
2797 error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2798 if (error)
2799 goto error0;
2800 }
2801
1da177e4 2802 }
a862e0fd 2803 xfs_perag_put(args->pag);
1da177e4
LT
2804 return 0;
2805error0:
a862e0fd 2806 xfs_perag_put(args->pag);
1da177e4
LT
2807 return error;
2808}
2809
4d89e20b
DC
2810/* Ensure that the freelist is at full capacity. */
2811int
2812xfs_free_extent_fix_freelist(
2813 struct xfs_trans *tp,
2814 xfs_agnumber_t agno,
2815 struct xfs_buf **agbp)
1da177e4 2816{
4d89e20b
DC
2817 struct xfs_alloc_arg args;
2818 int error;
1da177e4 2819
4d89e20b 2820 memset(&args, 0, sizeof(struct xfs_alloc_arg));
1da177e4
LT
2821 args.tp = tp;
2822 args.mp = tp->t_mountp;
4d89e20b 2823 args.agno = agno;
be65b18a
DC
2824
2825 /*
2826 * validate that the block number is legal - the enables us to detect
2827 * and handle a silent filesystem corruption rather than crashing.
2828 */
be65b18a 2829 if (args.agno >= args.mp->m_sb.sb_agcount)
2451337d 2830 return -EFSCORRUPTED;
be65b18a 2831
a862e0fd 2832 args.pag = xfs_perag_get(args.mp, args.agno);
be65b18a
DC
2833 ASSERT(args.pag);
2834
2835 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2836 if (error)
4d89e20b
DC
2837 goto out;
2838
2839 *agbp = args.agbp;
2840out:
2841 xfs_perag_put(args.pag);
2842 return error;
2843}
2844
2845/*
2846 * Free an extent.
2847 * Just break up the extent address and hand off to xfs_free_ag_extent
2848 * after fixing up the freelist.
2849 */
2850int /* error */
2851xfs_free_extent(
2852 struct xfs_trans *tp, /* transaction pointer */
2853 xfs_fsblock_t bno, /* starting block number of extent */
340785cc 2854 xfs_extlen_t len, /* length of extent */
3fd129b6
DW
2855 struct xfs_owner_info *oinfo, /* extent owner */
2856 enum xfs_ag_resv_type type) /* block reservation type */
4d89e20b
DC
2857{
2858 struct xfs_mount *mp = tp->t_mountp;
2859 struct xfs_buf *agbp;
2860 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
2861 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
2862 int error;
2863
2864 ASSERT(len != 0);
3fd129b6 2865 ASSERT(type != XFS_AG_RESV_AGFL);
4d89e20b 2866
ba9e7802 2867 if (XFS_TEST_ERROR(false, mp,
9e24cfd0 2868 XFS_ERRTAG_FREE_EXTENT))
ba9e7802
DW
2869 return -EIO;
2870
4d89e20b
DC
2871 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2872 if (error)
2873 return error;
2874
2875 XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
be65b18a
DC
2876
2877 /* validate the extent size is legal now we have the agf locked */
4d89e20b
DC
2878 XFS_WANT_CORRUPTED_GOTO(mp,
2879 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2880 err);
be65b18a 2881
3fd129b6 2882 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
4d89e20b
DC
2883 if (error)
2884 goto err;
2885
2886 xfs_extent_busy_insert(tp, agno, agbno, len, 0);
2887 return 0;
2888
2889err:
2890 xfs_trans_brelse(tp, agbp);
1da177e4
LT
2891 return error;
2892}
2d520bfa
DW
2893
2894struct xfs_alloc_query_range_info {
2895 xfs_alloc_query_range_fn fn;
2896 void *priv;
2897};
2898
2899/* Format btree record and pass to our callback. */
2900STATIC int
2901xfs_alloc_query_range_helper(
2902 struct xfs_btree_cur *cur,
2903 union xfs_btree_rec *rec,
2904 void *priv)
2905{
2906 struct xfs_alloc_query_range_info *query = priv;
2907 struct xfs_alloc_rec_incore irec;
2908
2909 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
2910 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
2911 return query->fn(cur, &irec, query->priv);
2912}
2913
2914/* Find all free space within a given range of blocks. */
2915int
2916xfs_alloc_query_range(
2917 struct xfs_btree_cur *cur,
2918 struct xfs_alloc_rec_incore *low_rec,
2919 struct xfs_alloc_rec_incore *high_rec,
2920 xfs_alloc_query_range_fn fn,
2921 void *priv)
2922{
2923 union xfs_btree_irec low_brec;
2924 union xfs_btree_irec high_brec;
2925 struct xfs_alloc_query_range_info query;
2926
2927 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
2928 low_brec.a = *low_rec;
2929 high_brec.a = *high_rec;
2930 query.priv = priv;
2931 query.fn = fn;
2932 return xfs_btree_query_range(cur, &low_brec, &high_brec,
2933 xfs_alloc_query_range_helper, &query);
2934}
e9a2599a
DW
2935
2936/* Find all free space records. */
2937int
2938xfs_alloc_query_all(
2939 struct xfs_btree_cur *cur,
2940 xfs_alloc_query_range_fn fn,
2941 void *priv)
2942{
2943 struct xfs_alloc_query_range_info query;
2944
2945 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
2946 query.priv = priv;
2947 query.fn = fn;
2948 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
2949}
21ec5416
DW
2950
2951/* Find the size of the AG, in blocks. */
2952xfs_agblock_t
2953xfs_ag_block_count(
2954 struct xfs_mount *mp,
2955 xfs_agnumber_t agno)
2956{
2957 ASSERT(agno < mp->m_sb.sb_agcount);
2958
2959 if (agno < mp->m_sb.sb_agcount - 1)
2960 return mp->m_sb.sb_agblocks;
2961 return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks);
2962}
2963
2964/*
2965 * Verify that an AG block number pointer neither points outside the AG
2966 * nor points at static metadata.
2967 */
2968bool
2969xfs_verify_agbno(
2970 struct xfs_mount *mp,
2971 xfs_agnumber_t agno,
2972 xfs_agblock_t agbno)
2973{
2974 xfs_agblock_t eoag;
2975
2976 eoag = xfs_ag_block_count(mp, agno);
2977 if (agbno >= eoag)
2978 return false;
2979 if (agbno <= XFS_AGFL_BLOCK(mp))
2980 return false;
2981 return true;
2982}
2983
2984/*
2985 * Verify that an FS block number pointer neither points outside the
2986 * filesystem nor points at static AG metadata.
2987 */
2988bool
2989xfs_verify_fsbno(
2990 struct xfs_mount *mp,
2991 xfs_fsblock_t fsbno)
2992{
2993 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
2994
2995 if (agno >= mp->m_sb.sb_agcount)
2996 return false;
2997 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
2998}
ce1d802e
DW
2999
3000/* Is there a record covering a given extent? */
3001int
3002xfs_alloc_has_record(
3003 struct xfs_btree_cur *cur,
3004 xfs_agblock_t bno,
3005 xfs_extlen_t len,
3006 bool *exists)
3007{
3008 union xfs_btree_irec low;
3009 union xfs_btree_irec high;
3010
3011 memset(&low, 0, sizeof(low));
3012 low.a.ar_startblock = bno;
3013 memset(&high, 0xFF, sizeof(high));
3014 high.a.ar_startblock = bno + len - 1;
3015
3016 return xfs_btree_has_record(cur, &low, &high, exists);
3017}
This page took 1.358686 seconds and 4 git commands to generate.