fs/gfs2/bmap.c

   1 /*
   2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4  *
   5  * This copyrighted material is made available to anyone wishing to use,
   6  * modify, copy, or redistribute it subject to the terms and conditions
   7  * of the GNU General Public License version 2.
   8  */
   9
  10 #include <linux/spinlock.h>
  11 #include <linux/completion.h>
  12 #include <linux/buffer_head.h>
  13 #include <linux/blkdev.h>
  14 #include <linux/gfs2_ondisk.h>
  15 #include <linux/crc32.h>
  16 #include <linux/iomap.h>
  17
  18 #include "gfs2.h"
  19 #include "incore.h"
  20 #include "bmap.h"
  21 #include "glock.h"
  22 #include "inode.h"
  23 #include "meta_io.h"
  24 #include "quota.h"
  25 #include "rgrp.h"
  26 #include "log.h"
  27 #include "super.h"
  28 #include "trans.h"
  29 #include "dir.h"
  30 #include "util.h"
  31 #include "trace_gfs2.h"
  32
  33 /* This doesn't need to be that large as max 64 bit pointers in a 4k
  34  * block is 512, so __u16 is fine for that. It saves stack space to
  35  * keep it small.
  36  */
  37 struct metapath {
  38         struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
  39         __u16 mp_list[GFS2_MAX_META_HEIGHT];
  40         int mp_fheight; /* find_metapath height */
  41         int mp_aheight; /* actual height (lookup height) */
  42 };
  43
  44 /**
  45  * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
  46  * @ip: the inode
  47  * @dibh: the dinode buffer
  48  * @block: the block number that was allocated
  49  * @page: The (optional) page. This is looked up if @page is NULL
  50  *
  51  * Returns: errno
  52  */
  53
  54 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
  55                                u64 block, struct page *page)
  56 {
  57         struct inode *inode = &ip->i_inode;
  58         struct buffer_head *bh;
  59         int release = 0;
  60
  61         if (!page || page->index) {
  62                 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
  63                 if (!page)
  64                         return -ENOMEM;
  65                 release = 1;
  66         }
  67
  68         if (!PageUptodate(page)) {
  69                 void *kaddr = kmap(page);
  70                 u64 dsize = i_size_read(inode);
  71
  72                 if (dsize > gfs2_max_stuffed_size(ip))
  73                         dsize = gfs2_max_stuffed_size(ip);
  74
  75                 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
  76                 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
  77                 kunmap(page);
  78
  79                 SetPageUptodate(page);
  80         }
  81
  82         if (!page_has_buffers(page))
  83                 create_empty_buffers(page, BIT(inode->i_blkbits),
  84                                      BIT(BH_Uptodate));
  85
  86         bh = page_buffers(page);
  87
  88         if (!buffer_mapped(bh))
  89                 map_bh(bh, inode->i_sb, block);
  90
  91         set_buffer_uptodate(bh);
  92         if (gfs2_is_jdata(ip))
  93                 gfs2_trans_add_data(ip->i_gl, bh);
  94         else {
  95                 mark_buffer_dirty(bh);
  96                 gfs2_ordered_add_inode(ip);
  97         }
  98
  99         if (release) {
 100                 unlock_page(page);
 101                 put_page(page);
 102         }
 103
 104         return 0;
 105 }
 106
 107 /**
 108  * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
 109  * @ip: The GFS2 inode to unstuff
 110  * @page: The (optional) page. This is looked up if the @page is NULL
 111  *
 112  * This routine unstuffs a dinode and returns it to a "normal" state such
 113  * that the height can be grown in the traditional way.
 114  *
 115  * Returns: errno
 116  */
 117
 118 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 119 {
 120         struct buffer_head *bh, *dibh;
 121         struct gfs2_dinode *di;
 122         u64 block = 0;
 123         int isdir = gfs2_is_dir(ip);
 124         int error;
 125
 126         down_write(&ip->i_rw_mutex);
 127
 128         error = gfs2_meta_inode_buffer(ip, &dibh);
 129         if (error)
 130                 goto out;
 131
 132         if (i_size_read(&ip->i_inode)) {
 133                 /* Get a free block, fill it with the stuffed data,
 134                    and write it out to disk */
 135
 136                 unsigned int n = 1;
 137                 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
 138                 if (error)
 139                         goto out_brelse;
 140                 if (isdir) {
 141                         gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
 142                         error = gfs2_dir_get_new_buffer(ip, block, &bh);
 143                         if (error)
 144                                 goto out_brelse;
 145                         gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
 146                                               dibh, sizeof(struct gfs2_dinode));
 147                         brelse(bh);
 148                 } else {
 149                         error = gfs2_unstuffer_page(ip, dibh, block, page);
 150                         if (error)
 151                                 goto out_brelse;
 152                 }
 153         }
 154
 155         /*  Set up the pointer to the new block  */
 156
 157         gfs2_trans_add_meta(ip->i_gl, dibh);
 158         di = (struct gfs2_dinode *)dibh->b_data;
 159         gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 160
 161         if (i_size_read(&ip->i_inode)) {
 162                 *(__be64 *)(di + 1) = cpu_to_be64(block);
 163                 gfs2_add_inode_blocks(&ip->i_inode, 1);
 164                 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
 165         }
 166
 167         ip->i_height = 1;
 168         di->di_height = cpu_to_be16(1);
 169
 170 out_brelse:
 171         brelse(dibh);
 172 out:
 173         up_write(&ip->i_rw_mutex);
 174         return error;
 175 }
 176
 177
 178 /**
 179  * find_metapath - Find path through the metadata tree
 180  * @sdp: The superblock
 181  * @block: The disk block to look up
 182  * @mp: The metapath to return the result in
 183  * @height: The pre-calculated height of the metadata tree
 184  *
 185  *   This routine returns a struct metapath structure that defines a path
 186  *   through the metadata of inode "ip" to get to block "block".
 187  *
 188  *   Example:
 189  *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
 190  *   filesystem with a blocksize of 4096.
 191  *
 192  *   find_metapath() would return a struct metapath structure set to:
 193  *   mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
 194  *
 195  *   That means that in order to get to the block containing the byte at
 196  *   offset 101342453, we would load the indirect block pointed to by pointer
 197  *   0 in the dinode.  We would then load the indirect block pointed to by
 198  *   pointer 48 in that indirect block.  We would then load the data block
 199  *   pointed to by pointer 165 in that indirect block.
 200  *
 201  *             ----------------------------------------
 202  *             | Dinode |                             |
 203  *             |        |                            4|
 204  *             |        |0 1 2 3 4 5                 9|
 205  *             |        |                            6|
 206  *             ----------------------------------------
 207  *                       |
 208  *                       |
 209  *                       V
 210  *             ----------------------------------------
 211  *             | Indirect Block                       |
 212  *             |                                     5|
 213  *             |            4 4 4 4 4 5 5            1|
 214  *             |0           5 6 7 8 9 0 1            2|
 215  *             ----------------------------------------
 216  *                                |
 217  *                                |
 218  *                                V
 219  *             ----------------------------------------
 220  *             | Indirect Block                       |
 221  *             |                         1 1 1 1 1   5|
 222  *             |                         6 6 6 6 6   1|
 223  *             |0                        3 4 5 6 7   2|
 224  *             ----------------------------------------
 225  *                                           |
 226  *                                           |
 227  *                                           V
 228  *             ----------------------------------------
 229  *             | Data block containing offset         |
 230  *             |            101342453                 |
 231  *             |                                      |
 232  *             |                                      |
 233  *             ----------------------------------------
 234  *
 235  */
 236
 237 static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
 238                           struct metapath *mp, unsigned int height)
 239 {
 240         unsigned int i;
 241
 242         mp->mp_fheight = height;
 243         for (i = height; i--;)
 244                 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
 245 }
 246
 247 static inline unsigned int metapath_branch_start(const struct metapath *mp)
 248 {
 249         if (mp->mp_list[0] == 0)
 250                 return 2;
 251         return 1;
 252 }
 253
 254 /**
 255  * metaptr1 - Return the first possible metadata pointer in a metapath buffer
 256  * @height: The metadata height (0 = dinode)
 257  * @mp: The metapath
 258  */
 259 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
 260 {
 261         struct buffer_head *bh = mp->mp_bh[height];
 262         if (height == 0)
 263                 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
 264         return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
 265 }
 266
 267 /**
 268  * metapointer - Return pointer to start of metadata in a buffer
 269  * @height: The metadata height (0 = dinode)
 270  * @mp: The metapath
 271  *
 272  * Return a pointer to the block number of the next height of the metadata
 273  * tree given a buffer containing the pointer to the current height of the
 274  * metadata tree.
 275  */
 276
 277 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
 278 {
 279         __be64 *p = metaptr1(height, mp);
 280         return p + mp->mp_list[height];
 281 }
 282
 283 static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
 284 {
 285         const struct buffer_head *bh = mp->mp_bh[height];
 286         return (const __be64 *)(bh->b_data + bh->b_size);
 287 }
 288
 289 static void clone_metapath(struct metapath *clone, struct metapath *mp)
 290 {
 291         unsigned int hgt;
 292
 293         *clone = *mp;
 294         for (hgt = 0; hgt < mp->mp_aheight; hgt++)
 295                 get_bh(clone->mp_bh[hgt]);
 296 }
 297
 298 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 299 {
 300         const __be64 *t;
 301
 302         for (t = start; t < end; t++) {
 303                 struct buffer_head *rabh;
 304
 305                 if (!*t)
 306                         continue;
 307
 308                 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
 309                 if (trylock_buffer(rabh)) {
 310                         if (!buffer_uptodate(rabh)) {
 311                                 rabh->b_end_io = end_buffer_read_sync;
 312                                 submit_bh(REQ_OP_READ,
 313                                           REQ_RAHEAD | REQ_META | REQ_PRIO,
 314                                           rabh);
 315                                 continue;
 316                         }
 317                         unlock_buffer(rabh);
 318                 }
 319                 brelse(rabh);
 320         }
 321 }
 322
 323 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
 324                              unsigned int x, unsigned int h)
 325 {
 326         for (; x < h; x++) {
 327                 __be64 *ptr = metapointer(x, mp);
 328                 u64 dblock = be64_to_cpu(*ptr);
 329                 int ret;
 330
 331                 if (!dblock)
 332                         break;
 333                 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
 334                 if (ret)
 335                         return ret;
 336         }
 337         mp->mp_aheight = x + 1;
 338         return 0;
 339 }
 340
 341 /**
 342  * lookup_metapath - Walk the metadata tree to a specific point
 343  * @ip: The inode
 344  * @mp: The metapath
 345  *
 346  * Assumes that the inode's buffer has already been looked up and
 347  * hooked onto mp->mp_bh[0] and that the metapath has been initialised
 348  * by find_metapath().
 349  *
 350  * If this function encounters part of the tree which has not been
 351  * allocated, it returns the current height of the tree at the point
 352  * at which it found the unallocated block. Blocks which are found are
 353  * added to the mp->mp_bh[] list.
 354  *
 355  * Returns: error
 356  */
 357
 358 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
 359 {
 360         return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
 361 }
 362
 363 /**
 364  * fillup_metapath - fill up buffers for the metadata path to a specific height
 365  * @ip: The inode
 366  * @mp: The metapath
 367  * @h: The height to which it should be mapped
 368  *
 369  * Similar to lookup_metapath, but does lookups for a range of heights
 370  *
 371  * Returns: error or the number of buffers filled
 372  */
 373
 374 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
 375 {
 376         unsigned int x = 0;
 377         int ret;
 378
 379         if (h) {
 380                 /* find the first buffer we need to look up. */
 381                 for (x = h - 1; x > 0; x--) {
 382                         if (mp->mp_bh[x])
 383                                 break;
 384                 }
 385         }
 386         ret = __fillup_metapath(ip, mp, x, h);
 387         if (ret)
 388                 return ret;
 389         return mp->mp_aheight - x - 1;
 390 }
 391
 392 static inline void release_metapath(struct metapath *mp)
 393 {
 394         int i;
 395
 396         for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
 397                 if (mp->mp_bh[i] == NULL)
 398                         break;
 399                 brelse(mp->mp_bh[i]);
 400         }
 401 }
 402
 403 /**
 404  * gfs2_extent_length - Returns length of an extent of blocks
 405  * @start: Start of the buffer
 406  * @len: Length of the buffer in bytes
 407  * @ptr: Current position in the buffer
 408  * @limit: Max extent length to return (0 = unlimited)
 409  * @eob: Set to 1 if we hit "end of block"
 410  *
 411  * If the first block is zero (unallocated) it will return the number of
 412  * unallocated blocks in the extent, otherwise it will return the number
 413  * of contiguous blocks in the extent.
 414  *
 415  * Returns: The length of the extent (minimum of one block)
 416  */
 417
 418 static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
 419 {
 420         const __be64 *end = (start + len);
 421         const __be64 *first = ptr;
 422         u64 d = be64_to_cpu(*ptr);
 423
 424         *eob = 0;
 425         do {
 426                 ptr++;
 427                 if (ptr >= end)
 428                         break;
 429                 if (limit && --limit == 0)
 430                         break;
 431                 if (d)
 432                         d++;
 433         } while(be64_to_cpu(*ptr) == d);
 434         if (ptr >= end)
 435                 *eob = 1;
 436         return (ptr - first);
 437 }
 438
 439 typedef const __be64 *(*gfs2_metadata_walker)(
 440                 struct metapath *mp,
 441                 const __be64 *start, const __be64 *end,
 442                 u64 factor, void *data);
 443
 444 #define WALK_STOP ((__be64 *)0)
 445 #define WALK_NEXT ((__be64 *)1)
 446
 447 static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
 448                 u64 len, struct metapath *mp, gfs2_metadata_walker walker,
 449                 void *data)
 450 {
 451         struct metapath clone;
 452         struct gfs2_inode *ip = GFS2_I(inode);
 453         struct gfs2_sbd *sdp = GFS2_SB(inode);
 454         const __be64 *start, *end, *ptr;
 455         u64 factor = 1;
 456         unsigned int hgt;
 457         int ret = 0;
 458
 459         for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
 460                 factor *= sdp->sd_inptrs;
 461
 462         for (;;) {
 463                 u64 step;
 464
 465                 /* Walk indirect block. */
 466                 start = metapointer(hgt, mp);
 467                 end = metaend(hgt, mp);
 468
 469                 step = (end - start) * factor;
 470                 if (step > len)
 471                         end = start + DIV_ROUND_UP_ULL(len, factor);
 472
 473                 ptr = walker(mp, start, end, factor, data);
 474                 if (ptr == WALK_STOP)
 475                         break;
 476                 if (step >= len)
 477                         break;
 478                 len -= step;
 479                 if (ptr != WALK_NEXT) {
 480                         BUG_ON(!*ptr);
 481                         mp->mp_list[hgt] += ptr - start;
 482                         goto fill_up_metapath;
 483                 }
 484
 485 lower_metapath:
 486                 /* Decrease height of metapath. */
 487                 if (mp != &clone) {
 488                         clone_metapath(&clone, mp);
 489                         mp = &clone;
 490                 }
 491                 brelse(mp->mp_bh[hgt]);
 492                 mp->mp_bh[hgt] = NULL;
 493                 if (!hgt)
 494                         break;
 495                 hgt--;
 496                 factor *= sdp->sd_inptrs;
 497
 498                 /* Advance in metadata tree. */
 499                 (mp->mp_list[hgt])++;
 500                 start = metapointer(hgt, mp);
 501                 end = metaend(hgt, mp);
 502                 if (start >= end) {
 503                         mp->mp_list[hgt] = 0;
 504                         if (!hgt)
 505                                 break;
 506                         goto lower_metapath;
 507                 }
 508
 509 fill_up_metapath:
 510                 /* Increase height of metapath. */
 511                 if (mp != &clone) {
 512                         clone_metapath(&clone, mp);
 513                         mp = &clone;
 514                 }
 515                 ret = fillup_metapath(ip, mp, ip->i_height - 1);
 516                 if (ret < 0)
 517                         break;
 518                 hgt += ret;
 519                 for (; ret; ret--)
 520                         do_div(factor, sdp->sd_inptrs);
 521                 mp->mp_aheight = hgt + 1;
 522         }
 523         if (mp == &clone)
 524                 release_metapath(mp);
 525         return ret;
 526 }
 527
 528 struct gfs2_hole_walker_args {
 529         u64 blocks;
 530 };
 531
 532 static const __be64 *gfs2_hole_walker(struct metapath *mp,
 533                 const __be64 *start, const __be64 *end,
 534                 u64 factor, void *data)
 535 {
 536         struct gfs2_hole_walker_args *args = data;
 537         const __be64 *ptr;
 538
 539         for (ptr = start; ptr < end; ptr++) {
 540                 if (*ptr) {
 541                         args->blocks += (ptr - start) * factor;
 542                         if (mp->mp_aheight == mp->mp_fheight)
 543                                 return WALK_STOP;
 544                         return ptr;  /* increase height */
 545                 }
 546         }
 547         args->blocks += (end - start) * factor;
 548         return WALK_NEXT;
 549 }
 550
 551 /**
 552  * gfs2_hole_size - figure out the size of a hole
 553  * @inode: The inode
 554  * @lblock: The logical starting block number
 555  * @len: How far to look (in blocks)
 556  * @mp: The metapath at lblock
 557  * @iomap: The iomap to store the hole size in
 558  *
 559  * This function modifies @mp.
 560  *
 561  * Returns: errno on error
 562  */
 563 static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
 564                           struct metapath *mp, struct iomap *iomap)
 565 {
 566         struct gfs2_hole_walker_args args = { };
 567         int ret = 0;
 568
 569         ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
 570         if (!ret)
 571                 iomap->length = args.blocks << inode->i_blkbits;
 572         return ret;
 573 }
 574
 575 static inline void bmap_lock(struct gfs2_inode *ip, int create)
 576 {
 577         if (create)
 578                 down_write(&ip->i_rw_mutex);
 579         else
 580                 down_read(&ip->i_rw_mutex);
 581 }
 582
 583 static inline void bmap_unlock(struct gfs2_inode *ip, int create)
 584 {
 585         if (create)
 586                 up_write(&ip->i_rw_mutex);
 587         else
 588                 up_read(&ip->i_rw_mutex);
 589 }
 590
 591 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
 592                                          struct gfs2_glock *gl, unsigned int i,
 593                                          unsigned offset, u64 bn)
 594 {
 595         __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
 596                        ((i > 1) ? sizeof(struct gfs2_meta_header) :
 597                                  sizeof(struct gfs2_dinode)));
 598         BUG_ON(i < 1);
 599         BUG_ON(mp->mp_bh[i] != NULL);
 600         mp->mp_bh[i] = gfs2_meta_new(gl, bn);
 601         gfs2_trans_add_meta(gl, mp->mp_bh[i]);
 602         gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
 603         gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
 604         ptr += offset;
 605         *ptr = cpu_to_be64(bn);
 606         return ptr;
 607 }
 608
 609 enum alloc_state {
 610         ALLOC_DATA = 0,
 611         ALLOC_GROW_DEPTH = 1,
 612         ALLOC_GROW_HEIGHT = 2,
 613         /* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
 614 };
 615
 616 /**
 617  * gfs2_bmap_alloc - Build a metadata tree of the requested height
 618  * @inode: The GFS2 inode
 619  * @lblock: The logical starting block of the extent
 620  * @bh_map: This is used to return the mapping details
 621  * @zero_new: True if newly allocated blocks should be zeroed
 622  * @mp: The metapath, with proper height information calculated
 623  * @maxlen: The max number of data blocks to alloc
 624  * @dblock: Pointer to return the resulting new block
 625  * @dblks: Pointer to return the number of blocks allocated
 626  *
 627  * In this routine we may have to alloc:
 628  *   i) Indirect blocks to grow the metadata tree height
 629  *  ii) Indirect blocks to fill in lower part of the metadata tree
 630  * iii) Data blocks
 631  *
 632  * The function is in two parts. The first part works out the total
 633  * number of blocks which we need. The second part does the actual
 634  * allocation asking for an extent at a time (if enough contiguous free
 635  * blocks are available, there will only be one request per bmap call)
 636  * and uses the state machine to initialise the blocks in order.
 637  *
 638  * Returns: errno on error
 639  */
 640
 641 static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 642                             unsigned flags, struct metapath *mp)
 643 {
 644         struct gfs2_inode *ip = GFS2_I(inode);
 645         struct gfs2_sbd *sdp = GFS2_SB(inode);
 646         struct buffer_head *dibh = mp->mp_bh[0];
 647         u64 bn;
 648         unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 649         unsigned dblks = 0;
 650         unsigned ptrs_per_blk;
 651         const unsigned end_of_metadata = mp->mp_fheight - 1;
 652         enum alloc_state state;
 653         __be64 *ptr;
 654         __be64 zero_bn = 0;
 655         size_t maxlen = iomap->length >> inode->i_blkbits;
 656
 657         BUG_ON(mp->mp_aheight < 1);
 658         BUG_ON(dibh == NULL);
 659
 660         gfs2_trans_add_meta(ip->i_gl, dibh);
 661
 662         if (mp->mp_fheight == mp->mp_aheight) {
 663                 struct buffer_head *bh;
 664                 int eob;
 665
 666                 /* Bottom indirect block exists, find unalloced extent size */
 667                 ptr = metapointer(end_of_metadata, mp);
 668                 bh = mp->mp_bh[end_of_metadata];
 669                 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
 670                                            maxlen, &eob);
 671                 BUG_ON(dblks < 1);
 672                 state = ALLOC_DATA;
 673         } else {
 674                 /* Need to allocate indirect blocks */
 675                 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
 676                         sdp->sd_diptrs;
 677                 dblks = min(maxlen, (size_t)(ptrs_per_blk -
 678                                              mp->mp_list[end_of_metadata]));
 679                 if (mp->mp_fheight == ip->i_height) {
 680                         /* Writing into existing tree, extend tree down */
 681                         iblks = mp->mp_fheight - mp->mp_aheight;
 682                         state = ALLOC_GROW_DEPTH;
 683                 } else {
 684                         /* Building up tree height */
 685                         state = ALLOC_GROW_HEIGHT;
 686                         iblks = mp->mp_fheight - ip->i_height;
 687                         branch_start = metapath_branch_start(mp);
 688                         iblks += (mp->mp_fheight - branch_start);
 689                 }
 690         }
 691
 692         /* start of the second part of the function (state machine) */
 693
 694         blks = dblks + iblks;
 695         i = mp->mp_aheight;
 696         do {
 697                 int error;
 698                 n = blks - alloced;
 699                 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
 700                 if (error)
 701                         return error;
 702                 alloced += n;
 703                 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 704                         gfs2_trans_add_unrevoke(sdp, bn, n);
 705                 switch (state) {
 706                 /* Growing height of tree */
 707                 case ALLOC_GROW_HEIGHT:
 708                         if (i == 1) {
 709                                 ptr = (__be64 *)(dibh->b_data +
 710                                                  sizeof(struct gfs2_dinode));
 711                                 zero_bn = *ptr;
 712                         }
 713                         for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
 714                              i++, n--)
 715                                 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
 716                         if (i - 1 == mp->mp_fheight - ip->i_height) {
 717                                 i--;
 718                                 gfs2_buffer_copy_tail(mp->mp_bh[i],
 719                                                 sizeof(struct gfs2_meta_header),
 720                                                 dibh, sizeof(struct gfs2_dinode));
 721                                 gfs2_buffer_clear_tail(dibh,
 722                                                 sizeof(struct gfs2_dinode) +
 723                                                 sizeof(__be64));
 724                                 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
 725                                         sizeof(struct gfs2_meta_header));
 726                                 *ptr = zero_bn;
 727                                 state = ALLOC_GROW_DEPTH;
 728                                 for(i = branch_start; i < mp->mp_fheight; i++) {
 729                                         if (mp->mp_bh[i] == NULL)
 730                                                 break;
 731                                         brelse(mp->mp_bh[i]);
 732                                         mp->mp_bh[i] = NULL;
 733                                 }
 734                                 i = branch_start;
 735                         }
 736                         if (n == 0)
 737                                 break;
 738                 /* Branching from existing tree */
 739                 case ALLOC_GROW_DEPTH:
 740                         if (i > 1 && i < mp->mp_fheight)
 741                                 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
 742                         for (; i < mp->mp_fheight && n > 0; i++, n--)
 743                                 gfs2_indirect_init(mp, ip->i_gl, i,
 744                                                    mp->mp_list[i-1], bn++);
 745                         if (i == mp->mp_fheight)
 746                                 state = ALLOC_DATA;
 747                         if (n == 0)
 748                                 break;
 749                 /* Tree complete, adding data blocks */
 750                 case ALLOC_DATA:
 751                         BUG_ON(n > dblks);
 752                         BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
 753                         gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
 754                         dblks = n;
 755                         ptr = metapointer(end_of_metadata, mp);
 756                         iomap->addr = bn << inode->i_blkbits;
 757                         iomap->flags |= IOMAP_F_NEW;
 758                         while (n-- > 0)
 759                                 *ptr++ = cpu_to_be64(bn++);
 760                         break;
 761                 }
 762         } while (iomap->addr == IOMAP_NULL_ADDR);
 763
 764         iomap->length = (u64)dblks << inode->i_blkbits;
 765         ip->i_height = mp->mp_fheight;
 766         gfs2_add_inode_blocks(&ip->i_inode, alloced);
 767         gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
 768         return 0;
 769 }
 770
 771 static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
 772 {
 773         struct gfs2_inode *ip = GFS2_I(inode);
 774
 775         iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
 776                       sizeof(struct gfs2_dinode);
 777         iomap->offset = 0;
 778         iomap->length = i_size_read(inode);
 779         iomap->type = IOMAP_MAPPED;
 780         iomap->flags = IOMAP_F_DATA_INLINE;
 781 }
 782
 783 /**
 784  * gfs2_iomap_begin - Map blocks from an inode to disk blocks
 785  * @inode: The inode
 786  * @pos: Starting position in bytes
 787  * @length: Length to map, in bytes
 788  * @flags: iomap flags
 789  * @iomap: The iomap structure
 790  *
 791  * Returns: errno
 792  */
 793 int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 794                      unsigned flags, struct iomap *iomap)
 795 {
 796         struct gfs2_inode *ip = GFS2_I(inode);
 797         struct gfs2_sbd *sdp = GFS2_SB(inode);
 798         struct metapath mp = { .mp_aheight = 1, };
 799         __be64 *ptr;
 800         sector_t lblock;
 801         sector_t lend;
 802         int ret = 0;
 803         int eob;
 804         unsigned int len;
 805         struct buffer_head *bh;
 806         u8 height;
 807
 808         trace_gfs2_iomap_start(ip, pos, length, flags);
 809         if (!length) {
 810                 ret = -EINVAL;
 811                 goto out;
 812         }
 813
 814         if (gfs2_is_stuffed(ip)) {
 815                 if (flags & IOMAP_REPORT) {
 816                         gfs2_stuffed_iomap(inode, iomap);
 817                         if (pos >= iomap->length)
 818                                 ret = -ENOENT;
 819                         goto out;
 820                 }
 821                 BUG_ON(!(flags & IOMAP_WRITE));
 822         }
 823
 824         lblock = pos >> inode->i_blkbits;
 825         lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
 826         len = lend - lblock;
 827
 828         iomap->offset = lblock << inode->i_blkbits;
 829         iomap->addr = IOMAP_NULL_ADDR;
 830         iomap->type = IOMAP_HOLE;
 831         iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
 832         iomap->flags = IOMAP_F_MERGED;
 833         bmap_lock(ip, flags & IOMAP_WRITE);
 834
 835         ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
 836         if (ret)
 837                 goto out_release;
 838
 839         height = ip->i_height;
 840         while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
 841                 height++;
 842         find_metapath(sdp, lblock, &mp, height);
 843         if (height > ip->i_height || gfs2_is_stuffed(ip))
 844                 goto do_alloc;
 845
 846         ret = lookup_metapath(ip, &mp);
 847         if (ret)
 848                 goto out_release;
 849
 850         if (mp.mp_aheight != ip->i_height)
 851                 goto do_alloc;
 852
 853         ptr = metapointer(ip->i_height - 1, &mp);
 854         if (*ptr == 0)
 855                 goto do_alloc;
 856
 857         iomap->type = IOMAP_MAPPED;
 858         iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
 859
 860         bh = mp.mp_bh[ip->i_height - 1];
 861         len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
 862         if (eob)
 863                 iomap->flags |= IOMAP_F_BOUNDARY;
 864         iomap->length = (u64)len << inode->i_blkbits;
 865
 866 out_release:
 867         release_metapath(&mp);
 868         bmap_unlock(ip, flags & IOMAP_WRITE);
 869 out:
 870         trace_gfs2_iomap_end(ip, iomap, ret);
 871         return ret;
 872
 873 do_alloc:
 874         if (flags & IOMAP_WRITE) {
 875                 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
 876         } else if (flags & IOMAP_REPORT) {
 877                 loff_t size = i_size_read(inode);
 878                 if (pos >= size)
 879                         ret = -ENOENT;
 880                 else if (height <= ip->i_height)
 881                         ret = gfs2_hole_size(inode, lblock, len, &mp, iomap);
 882                 else
 883                         iomap->length = size - pos;
 884         }
 885         goto out_release;
 886 }
 887
 888 /**
 889  * gfs2_block_map - Map one or more blocks of an inode to a disk block
 890  * @inode: The inode
 891  * @lblock: The logical block number
 892  * @bh_map: The bh to be mapped
 893  * @create: True if its ok to alloc blocks to satify the request
 894  *
 895  * The size of the requested mapping is defined in bh_map->b_size.
 896  *
 897  * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
 898  * when @lblock is not mapped.  Sets buffer_mapped(bh_map) and
 899  * bh_map->b_size to indicate the size of the mapping when @lblock and
 900  * successive blocks are mapped, up to the requested size.
 901  *
 902  * Sets buffer_boundary() if a read of metadata will be required
 903  * before the next block can be mapped. Sets buffer_new() if new
 904  * blocks were allocated.
 905  *
 906  * Returns: errno
 907  */
 908
 909 int gfs2_block_map(struct inode *inode, sector_t lblock,
 910                    struct buffer_head *bh_map, int create)
 911 {
 912         struct gfs2_inode *ip = GFS2_I(inode);
 913         struct iomap iomap;
 914         int ret, flags = 0;
 915
 916         clear_buffer_mapped(bh_map);
 917         clear_buffer_new(bh_map);
 918         clear_buffer_boundary(bh_map);
 919         trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 920
 921         if (create)
 922                 flags |= IOMAP_WRITE;
 923         ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
 924                                bh_map->b_size, flags, &iomap);
 925         if (ret) {
 926                 if (!create && ret == -ENOENT) {
 927                         /* Return unmapped buffer beyond the end of file.  */
 928                         ret = 0;
 929                 }
 930                 goto out;
 931         }
 932
 933         if (iomap.length > bh_map->b_size) {
 934                 iomap.length = bh_map->b_size;
 935                 iomap.flags &= ~IOMAP_F_BOUNDARY;
 936         }
 937         if (iomap.addr != IOMAP_NULL_ADDR)
 938                 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
 939         bh_map->b_size = iomap.length;
 940         if (iomap.flags & IOMAP_F_BOUNDARY)
 941                 set_buffer_boundary(bh_map);
 942         if (iomap.flags & IOMAP_F_NEW)
 943                 set_buffer_new(bh_map);
 944
 945 out:
 946         trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
 947         return ret;
 948 }
 949
 950 /*
 951  * Deprecated: do not use in new code
 952  */
 953 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
 954 {
 955         struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
 956         int ret;
 957         int create = *new;
 958
 959         BUG_ON(!extlen);
 960         BUG_ON(!dblock);
 961         BUG_ON(!new);
 962
 963         bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
 964         ret = gfs2_block_map(inode, lblock, &bh, create);
 965         *extlen = bh.b_size >> inode->i_blkbits;
 966         *dblock = bh.b_blocknr;
 967         if (buffer_new(&bh))
 968                 *new = 1;
 969         else
 970                 *new = 0;
 971         return ret;
 972 }
 973
 974 /**
 975  * gfs2_block_zero_range - Deal with zeroing out data
 976  *
 977  * This is partly borrowed from ext3.
 978  */
 979 static int gfs2_block_zero_range(struct inode *inode, loff_t from,
 980                                  unsigned int length)
 981 {
 982         struct address_space *mapping = inode->i_mapping;
 983         struct gfs2_inode *ip = GFS2_I(inode);
 984         unsigned long index = from >> PAGE_SHIFT;
 985         unsigned offset = from & (PAGE_SIZE-1);
 986         unsigned blocksize, iblock, pos;
 987         struct buffer_head *bh;
 988         struct page *page;
 989         int err;
 990
 991         page = find_or_create_page(mapping, index, GFP_NOFS);
 992         if (!page)
 993                 return 0;
 994
 995         blocksize = inode->i_sb->s_blocksize;
 996         iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 997
 998         if (!page_has_buffers(page))
 999                 create_empty_buffers(page, blocksize, 0);
1000
1001         /* Find the buffer that contains "offset" */
1002         bh = page_buffers(page);
1003         pos = blocksize;
1004         while (offset >= pos) {
1005                 bh = bh->b_this_page;
1006                 iblock++;
1007                 pos += blocksize;
1008         }
1009
1010         err = 0;
1011
1012         if (!buffer_mapped(bh)) {
1013                 gfs2_block_map(inode, iblock, bh, 0);
1014                 /* unmapped? It's a hole - nothing to do */
1015                 if (!buffer_mapped(bh))
1016                         goto unlock;
1017         }
1018
1019         /* Ok, it's mapped. Make sure it's up-to-date */
1020         if (PageUptodate(page))
1021                 set_buffer_uptodate(bh);
1022
1023         if (!buffer_uptodate(bh)) {
1024                 err = -EIO;
1025                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1026                 wait_on_buffer(bh);
1027                 /* Uhhuh. Read error. Complain and punt. */
1028                 if (!buffer_uptodate(bh))
1029                         goto unlock;
1030                 err = 0;
1031         }
1032
1033         if (gfs2_is_jdata(ip))
1034                 gfs2_trans_add_data(ip->i_gl, bh);
1035         else
1036                 gfs2_ordered_add_inode(ip);
1037
1038         zero_user(page, offset, length);
1039         mark_buffer_dirty(bh);
1040 unlock:
1041         unlock_page(page);
1042         put_page(page);
1043         return err;
1044 }
1045
1046 #define GFS2_JTRUNC_REVOKES 8192
1047
1048 /**
1049  * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1050  * @inode: The inode being truncated
1051  * @oldsize: The original (larger) size
1052  * @newsize: The new smaller size
1053  *
1054  * With jdata files, we have to journal a revoke for each block which is
1055  * truncated. As a result, we need to split this into separate transactions
1056  * if the number of pages being truncated gets too large.
1057  */
1058
1059 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1060 {
1061         struct gfs2_sbd *sdp = GFS2_SB(inode);
1062         u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1063         u64 chunk;
1064         int error;
1065
1066         while (oldsize != newsize) {
1067                 struct gfs2_trans *tr;
1068                 unsigned int offs;
1069
1070                 chunk = oldsize - newsize;
1071                 if (chunk > max_chunk)
1072                         chunk = max_chunk;
1073
1074                 offs = oldsize & ~PAGE_MASK;
1075                 if (offs && chunk > PAGE_SIZE)
1076                         chunk = offs + ((chunk - offs) & PAGE_MASK);
1077
1078                 truncate_pagecache(inode, oldsize - chunk);
1079                 oldsize -= chunk;
1080
1081                 tr = current->journal_info;
1082                 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1083                         continue;
1084
1085                 gfs2_trans_end(sdp);
1086                 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1087                 if (error)
1088                         return error;
1089         }
1090
1091         return 0;
1092 }
1093
1094 static int trunc_start(struct inode *inode, u64 newsize)
1095 {
1096         struct gfs2_inode *ip = GFS2_I(inode);
1097         struct gfs2_sbd *sdp = GFS2_SB(inode);
1098         struct buffer_head *dibh = NULL;
1099         int journaled = gfs2_is_jdata(ip);
1100         u64 oldsize = inode->i_size;
1101         int error;
1102
1103         if (journaled)
1104                 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1105         else
1106                 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1107         if (error)
1108                 return error;
1109
1110         error = gfs2_meta_inode_buffer(ip, &dibh);
1111         if (error)
1112                 goto out;
1113
1114         gfs2_trans_add_meta(ip->i_gl, dibh);
1115
1116         if (gfs2_is_stuffed(ip)) {
1117                 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1118         } else {
1119                 unsigned int blocksize = i_blocksize(inode);
1120                 unsigned int offs = newsize & (blocksize - 1);
1121                 if (offs) {
1122                         error = gfs2_block_zero_range(inode, newsize,
1123                                                       blocksize - offs);
1124                         if (error)
1125                                 goto out;
1126                 }
1127                 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1128         }
1129
1130         i_size_write(inode, newsize);
1131         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1132         gfs2_dinode_out(ip, dibh->b_data);
1133
1134         if (journaled)
1135                 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1136         else
1137                 truncate_pagecache(inode, newsize);
1138
1139 out:
1140         brelse(dibh);
1141         if (current->journal_info)
1142                 gfs2_trans_end(sdp);
1143         return error;
1144 }
1145
1146 /**
1147  * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1148  * @ip: inode
1149  * @rg_gh: holder of resource group glock
1150  * @bh: buffer head to sweep
1151  * @start: starting point in bh
1152  * @end: end point in bh
1153  * @meta: true if bh points to metadata (rather than data)
1154  * @btotal: place to keep count of total blocks freed
1155  *
1156  * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1157  * free, and free them all. However, we do it one rgrp at a time. If this
1158  * block has references to multiple rgrps, we break it into individual
1159  * transactions. This allows other processes to use the rgrps while we're
1160  * focused on a single one, for better concurrency / performance.
1161  * At every transaction boundary, we rewrite the inode into the journal.
1162  * That way the bitmaps are kept consistent with the inode and we can recover
1163  * if we're interrupted by power-outages.
1164  *
1165  * Returns: 0, or return code if an error occurred.
1166  *          *btotal has the total number of blocks freed
1167  */
1168 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1169                               struct buffer_head *bh, __be64 *start, __be64 *end,
1170                               bool meta, u32 *btotal)
1171 {
1172         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1173         struct gfs2_rgrpd *rgd;
1174         struct gfs2_trans *tr;
1175         __be64 *p;
1176         int blks_outside_rgrp;
1177         u64 bn, bstart, isize_blks;
1178         s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1179         int ret = 0;
1180         bool buf_in_tr = false; /* buffer was added to transaction */
1181
1182 more_rgrps:
1183         rgd = NULL;
1184         if (gfs2_holder_initialized(rd_gh)) {
1185                 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1186                 gfs2_assert_withdraw(sdp,
1187                              gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1188         }
1189         blks_outside_rgrp = 0;
1190         bstart = 0;
1191         blen = 0;
1192
1193         for (p = start; p < end; p++) {
1194                 if (!*p)
1195                         continue;
1196                 bn = be64_to_cpu(*p);
1197
1198                 if (rgd) {
1199                         if (!rgrp_contains_block(rgd, bn)) {
1200                                 blks_outside_rgrp++;
1201                                 continue;
1202                         }
1203                 } else {
1204                         rgd = gfs2_blk2rgrpd(sdp, bn, true);
1205                         if (unlikely(!rgd)) {
1206                                 ret = -EIO;
1207                                 goto out;
1208                         }
1209                         ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1210                                                  0, rd_gh);
1211                         if (ret)
1212                                 goto out;
1213
1214                         /* Must be done with the rgrp glock held: */
1215                         if (gfs2_rs_active(&ip->i_res) &&
1216                             rgd == ip->i_res.rs_rbm.rgd)
1217                                 gfs2_rs_deltree(&ip->i_res);
1218                 }
1219
1220                 /* The size of our transactions will be unknown until we
1221                    actually process all the metadata blocks that relate to
1222                    the rgrp. So we estimate. We know it can't be more than
1223                    the dinode's i_blocks and we don't want to exceed the
1224                    journal flush threshold, sd_log_thresh2. */
1225                 if (current->journal_info == NULL) {
1226                         unsigned int jblocks_rqsted, revokes;
1227
1228                         jblocks_rqsted = rgd->rd_length + RES_DINODE +
1229                                 RES_INDIRECT;
1230                         isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1231                         if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1232                                 jblocks_rqsted +=
1233                                         atomic_read(&sdp->sd_log_thresh2);
1234                         else
1235                                 jblocks_rqsted += isize_blks;
1236                         revokes = jblocks_rqsted;
1237                         if (meta)
1238                                 revokes += end - start;
1239                         else if (ip->i_depth)
1240                                 revokes += sdp->sd_inptrs;
1241                         ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1242                         if (ret)
1243                                 goto out_unlock;
1244                         down_write(&ip->i_rw_mutex);
1245                 }
1246                 /* check if we will exceed the transaction blocks requested */
1247                 tr = current->journal_info;
1248                 if (tr->tr_num_buf_new + RES_STATFS +
1249                     RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1250                         /* We set blks_outside_rgrp to ensure the loop will
1251                            be repeated for the same rgrp, but with a new
1252                            transaction. */
1253                         blks_outside_rgrp++;
1254                         /* This next part is tricky. If the buffer was added
1255                            to the transaction, we've already set some block
1256                            pointers to 0, so we better follow through and free
1257                            them, or we will introduce corruption (so break).
1258                            This may be impossible, or at least rare, but I
1259                            decided to cover the case regardless.
1260
1261                            If the buffer was not added to the transaction
1262                            (this call), doing so would exceed our transaction
1263                            size, so we need to end the transaction and start a
1264                            new one (so goto). */
1265
1266                         if (buf_in_tr)
1267                                 break;
1268                         goto out_unlock;
1269                 }
1270
1271                 gfs2_trans_add_meta(ip->i_gl, bh);
1272                 buf_in_tr = true;
1273                 *p = 0;
1274                 if (bstart + blen == bn) {
1275                         blen++;
1276                         continue;
1277                 }
1278                 if (bstart) {
1279                         __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1280                         (*btotal) += blen;
1281                         gfs2_add_inode_blocks(&ip->i_inode, -blen);
1282                 }
1283                 bstart = bn;
1284                 blen = 1;
1285         }
1286         if (bstart) {
1287                 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1288                 (*btotal) += blen;
1289                 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1290         }
1291 out_unlock:
1292         if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1293                                             outside the rgrp we just processed,
1294                                             do it all over again. */
1295                 if (current->journal_info) {
1296                         struct buffer_head *dibh;
1297
1298                         ret = gfs2_meta_inode_buffer(ip, &dibh);
1299                         if (ret)
1300                                 goto out;
1301
1302                         /* Every transaction boundary, we rewrite the dinode
1303                            to keep its di_blocks current in case of failure. */
1304                         ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1305                                 current_time(&ip->i_inode);
1306                         gfs2_trans_add_meta(ip->i_gl, dibh);
1307                         gfs2_dinode_out(ip, dibh->b_data);
1308                         brelse(dibh);
1309                         up_write(&ip->i_rw_mutex);
1310                         gfs2_trans_end(sdp);
1311                 }
1312                 gfs2_glock_dq_uninit(rd_gh);
1313                 cond_resched();
1314                 goto more_rgrps;
1315         }
1316 out:
1317         return ret;
1318 }
1319
1320 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1321 {
1322         if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1323                 return false;
1324         return true;
1325 }
1326
1327 /**
1328  * find_nonnull_ptr - find a non-null pointer given a metapath and height
1329  * @mp: starting metapath
1330  * @h: desired height to search
1331  *
1332  * Assumes the metapath is valid (with buffers) out to height h.
1333  * Returns: true if a non-null pointer was found in the metapath buffer
1334  *          false if all remaining pointers are NULL in the buffer
1335  */
1336 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1337                              unsigned int h,
1338                              __u16 *end_list, unsigned int end_aligned)
1339 {
1340         struct buffer_head *bh = mp->mp_bh[h];
1341         __be64 *first, *ptr, *end;
1342
1343         first = metaptr1(h, mp);
1344         ptr = first + mp->mp_list[h];
1345         end = (__be64 *)(bh->b_data + bh->b_size);
1346         if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1347                 bool keep_end = h < end_aligned;
1348                 end = first + end_list[h] + keep_end;
1349         }
1350
1351         while (ptr < end) {
1352                 if (*ptr) { /* if we have a non-null pointer */
1353                         mp->mp_list[h] = ptr - first;
1354                         h++;
1355                         if (h < GFS2_MAX_META_HEIGHT)
1356                                 mp->mp_list[h] = 0;
1357                         return true;
1358                 }
1359                 ptr++;
1360         }
1361         return false;
1362 }
1363
1364 enum dealloc_states {
1365         DEALLOC_MP_FULL = 0,    /* Strip a metapath with all buffers read in */
1366         DEALLOC_MP_LOWER = 1,   /* lower the metapath strip height */
1367         DEALLOC_FILL_MP = 2,  /* Fill in the metapath to the given height. */
1368         DEALLOC_DONE = 3,       /* process complete */
1369 };
1370
1371 static inline void
1372 metapointer_range(struct metapath *mp, int height,
1373                   __u16 *start_list, unsigned int start_aligned,
1374                   __u16 *end_list, unsigned int end_aligned,
1375                   __be64 **start, __be64 **end)
1376 {
1377         struct buffer_head *bh = mp->mp_bh[height];
1378         __be64 *first;
1379
1380         first = metaptr1(height, mp);
1381         *start = first;
1382         if (mp_eq_to_hgt(mp, start_list, height)) {
1383                 bool keep_start = height < start_aligned;
1384                 *start = first + start_list[height] + keep_start;
1385         }
1386         *end = (__be64 *)(bh->b_data + bh->b_size);
1387         if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1388                 bool keep_end = height < end_aligned;
1389                 *end = first + end_list[height] + keep_end;
1390         }
1391 }
1392
1393 static inline bool walk_done(struct gfs2_sbd *sdp,
1394                              struct metapath *mp, int height,
1395                              __u16 *end_list, unsigned int end_aligned)
1396 {
1397         __u16 end;
1398
1399         if (end_list) {
1400                 bool keep_end = height < end_aligned;
1401                 if (!mp_eq_to_hgt(mp, end_list, height))
1402                         return false;
1403                 end = end_list[height] + keep_end;
1404         } else
1405                 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1406         return mp->mp_list[height] >= end;
1407 }
1408
1409 /**
1410  * punch_hole - deallocate blocks in a file
1411  * @ip: inode to truncate
1412  * @offset: the start of the hole
1413  * @length: the size of the hole (or 0 for truncate)
1414  *
1415  * Punch a hole into a file or truncate a file at a given position.  This
1416  * function operates in whole blocks (@offset and @length are rounded
1417  * accordingly); partially filled blocks must be cleared otherwise.
1418  *
1419  * This function works from the bottom up, and from the right to the left. In
1420  * other words, it strips off the highest layer (data) before stripping any of
1421  * the metadata. Doing it this way is best in case the operation is interrupted
1422  * by power failure, etc.  The dinode is rewritten in every transaction to
1423  * guarantee integrity.
1424  */
1425 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1426 {
1427         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1428         u64 maxsize = sdp->sd_heightsize[ip->i_height];
1429         struct metapath mp = {};
1430         struct buffer_head *dibh, *bh;
1431         struct gfs2_holder rd_gh;
1432         unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1433         u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1434         __u16 start_list[GFS2_MAX_META_HEIGHT];
1435         __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1436         unsigned int start_aligned, uninitialized_var(end_aligned);
1437         unsigned int strip_h = ip->i_height - 1;
1438         u32 btotal = 0;
1439         int ret, state;
1440         int mp_h; /* metapath buffers are read in to this height */
1441         u64 prev_bnr = 0;
1442         __be64 *start, *end;
1443
1444         if (offset >= maxsize) {
1445                 /*
1446                  * The starting point lies beyond the allocated meta-data;
1447                  * there are no blocks do deallocate.
1448                  */
1449                 return 0;
1450         }
1451
1452         /*
1453          * The start position of the hole is defined by lblock, start_list, and
1454          * start_aligned.  The end position of the hole is defined by lend,
1455          * end_list, and end_aligned.
1456          *
1457          * start_aligned and end_aligned define down to which height the start
1458          * and end positions are aligned to the metadata tree (i.e., the
1459          * position is a multiple of the metadata granularity at the height
1460          * above).  This determines at which heights additional meta pointers
1461          * needs to be preserved for the remaining data.
1462          */
1463
1464         if (length) {
1465                 u64 end_offset = offset + length;
1466                 u64 lend;
1467
1468                 /*
1469                  * Clip the end at the maximum file size for the given height:
1470                  * that's how far the metadata goes; files bigger than that
1471                  * will have additional layers of indirection.
1472                  */
1473                 if (end_offset > maxsize)
1474                         end_offset = maxsize;
1475                 lend = end_offset >> bsize_shift;
1476
1477                 if (lblock >= lend)
1478                         return 0;
1479
1480                 find_metapath(sdp, lend, &mp, ip->i_height);
1481                 end_list = __end_list;
1482                 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1483
1484                 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1485                         if (end_list[mp_h])
1486                                 break;
1487                 }
1488                 end_aligned = mp_h;
1489         }
1490
1491         find_metapath(sdp, lblock, &mp, ip->i_height);
1492         memcpy(start_list, mp.mp_list, sizeof(start_list));
1493
1494         for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1495                 if (start_list[mp_h])
1496                         break;
1497         }
1498         start_aligned = mp_h;
1499
1500         ret = gfs2_meta_inode_buffer(ip, &dibh);
1501         if (ret)
1502                 return ret;
1503
1504         mp.mp_bh[0] = dibh;
1505         ret = lookup_metapath(ip, &mp);
1506         if (ret)
1507                 goto out_metapath;
1508
1509         /* issue read-ahead on metadata */
1510         for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1511                 metapointer_range(&mp, mp_h, start_list, start_aligned,
1512                                   end_list, end_aligned, &start, &end);
1513                 gfs2_metapath_ra(ip->i_gl, start, end);
1514         }
1515
1516         if (mp.mp_aheight == ip->i_height)
1517                 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1518         else
1519                 state = DEALLOC_FILL_MP; /* deal with partial metapath */
1520
1521         ret = gfs2_rindex_update(sdp);
1522         if (ret)
1523                 goto out_metapath;
1524
1525         ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1526         if (ret)
1527                 goto out_metapath;
1528         gfs2_holder_mark_uninitialized(&rd_gh);
1529
1530         mp_h = strip_h;
1531
1532         while (state != DEALLOC_DONE) {
1533                 switch (state) {
1534                 /* Truncate a full metapath at the given strip height.
1535                  * Note that strip_h == mp_h in order to be in this state. */
1536                 case DEALLOC_MP_FULL:
1537                         bh = mp.mp_bh[mp_h];
1538                         gfs2_assert_withdraw(sdp, bh);
1539                         if (gfs2_assert_withdraw(sdp,
1540                                                  prev_bnr != bh->b_blocknr)) {
1541                                 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1542                                        "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1543                                        sdp->sd_fsname,
1544                                        (unsigned long long)ip->i_no_addr,
1545                                        prev_bnr, ip->i_height, strip_h, mp_h);
1546                         }
1547                         prev_bnr = bh->b_blocknr;
1548
1549                         if (gfs2_metatype_check(sdp, bh,
1550                                                 (mp_h ? GFS2_METATYPE_IN :
1551                                                         GFS2_METATYPE_DI))) {
1552                                 ret = -EIO;
1553                                 goto out;
1554                         }
1555
1556                         /*
1557                          * Below, passing end_aligned as 0 gives us the
1558                          * metapointer range excluding the end point: the end
1559                          * point is the first metapath we must not deallocate!
1560                          */
1561
1562                         metapointer_range(&mp, mp_h, start_list, start_aligned,
1563                                           end_list, 0 /* end_aligned */,
1564                                           &start, &end);
1565                         ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1566                                                  start, end,
1567                                                  mp_h != ip->i_height - 1,
1568                                                  &btotal);
1569
1570                         /* If we hit an error or just swept dinode buffer,
1571                            just exit. */
1572                         if (ret || !mp_h) {
1573                                 state = DEALLOC_DONE;
1574                                 break;
1575                         }
1576                         state = DEALLOC_MP_LOWER;
1577                         break;
1578
1579                 /* lower the metapath strip height */
1580                 case DEALLOC_MP_LOWER:
1581                         /* We're done with the current buffer, so release it,
1582                            unless it's the dinode buffer. Then back up to the
1583                            previous pointer. */
1584                         if (mp_h) {
1585                                 brelse(mp.mp_bh[mp_h]);
1586                                 mp.mp_bh[mp_h] = NULL;
1587                         }
1588                         /* If we can't get any lower in height, we've stripped
1589                            off all we can. Next step is to back up and start
1590                            stripping the previous level of metadata. */
1591                         if (mp_h == 0) {
1592                                 strip_h--;
1593                                 memcpy(mp.mp_list, start_list, sizeof(start_list));
1594                                 mp_h = strip_h;
1595                                 state = DEALLOC_FILL_MP;
1596                                 break;
1597                         }
1598                         mp.mp_list[mp_h] = 0;
1599                         mp_h--; /* search one metadata height down */
1600                         mp.mp_list[mp_h]++;
1601                         if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1602                                 break;
1603                         /* Here we've found a part of the metapath that is not
1604                          * allocated. We need to search at that height for the
1605                          * next non-null pointer. */
1606                         if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1607                                 state = DEALLOC_FILL_MP;
1608                                 mp_h++;
1609                         }
1610                         /* No more non-null pointers at this height. Back up
1611                            to the previous height and try again. */
1612                         break; /* loop around in the same state */
1613
1614                 /* Fill the metapath with buffers to the given height. */
1615                 case DEALLOC_FILL_MP:
1616                         /* Fill the buffers out to the current height. */
1617                         ret = fillup_metapath(ip, &mp, mp_h);
1618                         if (ret < 0)
1619                                 goto out;
1620
1621                         /* issue read-ahead on metadata */
1622                         if (mp.mp_aheight > 1) {
1623                                 for (; ret > 1; ret--) {
1624                                         metapointer_range(&mp, mp.mp_aheight - ret,
1625                                                           start_list, start_aligned,
1626                                                           end_list, end_aligned,
1627                                                           &start, &end);
1628                                         gfs2_metapath_ra(ip->i_gl, start, end);
1629                                 }
1630                         }
1631
1632                         /* If buffers found for the entire strip height */
1633                         if (mp.mp_aheight - 1 == strip_h) {
1634                                 state = DEALLOC_MP_FULL;
1635                                 break;
1636                         }
1637                         if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1638                                 mp_h = mp.mp_aheight - 1;
1639
1640                         /* If we find a non-null block pointer, crawl a bit
1641                            higher up in the metapath and try again, otherwise
1642                            we need to look lower for a new starting point. */
1643                         if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1644                                 mp_h++;
1645                         else
1646                                 state = DEALLOC_MP_LOWER;
1647                         break;
1648                 }
1649         }
1650
1651         if (btotal) {
1652                 if (current->journal_info == NULL) {
1653                         ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1654                                                RES_QUOTA, 0);
1655                         if (ret)
1656                                 goto out;
1657                         down_write(&ip->i_rw_mutex);
1658                 }
1659                 gfs2_statfs_change(sdp, 0, +btotal, 0);
1660                 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1661                                   ip->i_inode.i_gid);
1662                 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1663                 gfs2_trans_add_meta(ip->i_gl, dibh);
1664                 gfs2_dinode_out(ip, dibh->b_data);
1665                 up_write(&ip->i_rw_mutex);
1666                 gfs2_trans_end(sdp);
1667         }
1668
1669 out:
1670         if (gfs2_holder_initialized(&rd_gh))
1671                 gfs2_glock_dq_uninit(&rd_gh);
1672         if (current->journal_info) {
1673                 up_write(&ip->i_rw_mutex);
1674                 gfs2_trans_end(sdp);
1675                 cond_resched();
1676         }
1677         gfs2_quota_unhold(ip);
1678 out_metapath:
1679         release_metapath(&mp);
1680         return ret;
1681 }
1682
1683 static int trunc_end(struct gfs2_inode *ip)
1684 {
1685         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1686         struct buffer_head *dibh;
1687         int error;
1688
1689         error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1690         if (error)
1691                 return error;
1692
1693         down_write(&ip->i_rw_mutex);
1694
1695         error = gfs2_meta_inode_buffer(ip, &dibh);
1696         if (error)
1697                 goto out;
1698
1699         if (!i_size_read(&ip->i_inode)) {
1700                 ip->i_height = 0;
1701                 ip->i_goal = ip->i_no_addr;
1702                 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1703                 gfs2_ordered_del_inode(ip);
1704         }
1705         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1706         ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1707
1708         gfs2_trans_add_meta(ip->i_gl, dibh);
1709         gfs2_dinode_out(ip, dibh->b_data);
1710         brelse(dibh);
1711
1712 out:
1713         up_write(&ip->i_rw_mutex);
1714         gfs2_trans_end(sdp);
1715         return error;
1716 }
1717
1718 /**
1719  * do_shrink - make a file smaller
1720  * @inode: the inode
1721  * @newsize: the size to make the file
1722  *
1723  * Called with an exclusive lock on @inode. The @size must
1724  * be equal to or smaller than the current inode size.
1725  *
1726  * Returns: errno
1727  */
1728
1729 static int do_shrink(struct inode *inode, u64 newsize)
1730 {
1731         struct gfs2_inode *ip = GFS2_I(inode);
1732         int error;
1733
1734         error = trunc_start(inode, newsize);
1735         if (error < 0)
1736                 return error;
1737         if (gfs2_is_stuffed(ip))
1738                 return 0;
1739
1740         error = punch_hole(ip, newsize, 0);
1741         if (error == 0)
1742                 error = trunc_end(ip);
1743
1744         return error;
1745 }
1746
1747 void gfs2_trim_blocks(struct inode *inode)
1748 {
1749         int ret;
1750
1751         ret = do_shrink(inode, inode->i_size);
1752         WARN_ON(ret != 0);
1753 }
1754
1755 /**
1756  * do_grow - Touch and update inode size
1757  * @inode: The inode
1758  * @size: The new size
1759  *
1760  * This function updates the timestamps on the inode and
1761  * may also increase the size of the inode. This function
1762  * must not be called with @size any smaller than the current
1763  * inode size.
1764  *
1765  * Although it is not strictly required to unstuff files here,
1766  * earlier versions of GFS2 have a bug in the stuffed file reading
1767  * code which will result in a buffer overrun if the size is larger
1768  * than the max stuffed file size. In order to prevent this from
1769  * occurring, such files are unstuffed, but in other cases we can
1770  * just update the inode size directly.
1771  *
1772  * Returns: 0 on success, or -ve on error
1773  */
1774
1775 static int do_grow(struct inode *inode, u64 size)
1776 {
1777         struct gfs2_inode *ip = GFS2_I(inode);
1778         struct gfs2_sbd *sdp = GFS2_SB(inode);
1779         struct gfs2_alloc_parms ap = { .target = 1, };
1780         struct buffer_head *dibh;
1781         int error;
1782         int unstuff = 0;
1783
1784         if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
1785                 error = gfs2_quota_lock_check(ip, &ap);
1786                 if (error)
1787                         return error;
1788
1789                 error = gfs2_inplace_reserve(ip, &ap);
1790                 if (error)
1791                         goto do_grow_qunlock;
1792                 unstuff = 1;
1793         }
1794
1795         error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1796                                  (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1797                                   0 : RES_QUOTA), 0);
1798         if (error)
1799                 goto do_grow_release;
1800
1801         if (unstuff) {
1802                 error = gfs2_unstuff_dinode(ip, NULL);
1803                 if (error)
1804                         goto do_end_trans;
1805         }
1806
1807         error = gfs2_meta_inode_buffer(ip, &dibh);
1808         if (error)
1809                 goto do_end_trans;
1810
1811         i_size_write(inode, size);
1812         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1813         gfs2_trans_add_meta(ip->i_gl, dibh);
1814         gfs2_dinode_out(ip, dibh->b_data);
1815         brelse(dibh);
1816
1817 do_end_trans:
1818         gfs2_trans_end(sdp);
1819 do_grow_release:
1820         if (unstuff) {
1821                 gfs2_inplace_release(ip);
1822 do_grow_qunlock:
1823                 gfs2_quota_unlock(ip);
1824         }
1825         return error;
1826 }
1827
1828 /**
1829  * gfs2_setattr_size - make a file a given size
1830  * @inode: the inode
1831  * @newsize: the size to make the file
1832  *
1833  * The file size can grow, shrink, or stay the same size. This
1834  * is called holding i_rwsem and an exclusive glock on the inode
1835  * in question.
1836  *
1837  * Returns: errno
1838  */
1839
1840 int gfs2_setattr_size(struct inode *inode, u64 newsize)
1841 {
1842         struct gfs2_inode *ip = GFS2_I(inode);
1843         int ret;
1844
1845         BUG_ON(!S_ISREG(inode->i_mode));
1846
1847         ret = inode_newsize_ok(inode, newsize);
1848         if (ret)
1849                 return ret;
1850
1851         inode_dio_wait(inode);
1852
1853         ret = gfs2_rsqa_alloc(ip);
1854         if (ret)
1855                 goto out;
1856
1857         if (newsize >= inode->i_size) {
1858                 ret = do_grow(inode, newsize);
1859                 goto out;
1860         }
1861
1862         ret = do_shrink(inode, newsize);
1863 out:
1864         gfs2_rsqa_delete(ip, NULL);
1865         return ret;
1866 }
1867
1868 int gfs2_truncatei_resume(struct gfs2_inode *ip)
1869 {
1870         int error;
1871         error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
1872         if (!error)
1873                 error = trunc_end(ip);
1874         return error;
1875 }
1876
1877 int gfs2_file_dealloc(struct gfs2_inode *ip)
1878 {
1879         return punch_hole(ip, 0, 0);
1880 }
1881
1882 /**
1883  * gfs2_free_journal_extents - Free cached journal bmap info
1884  * @jd: The journal
1885  *
1886  */
1887
1888 void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1889 {
1890         struct gfs2_journal_extent *jext;
1891
1892         while(!list_empty(&jd->extent_list)) {
1893                 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1894                 list_del(&jext->list);
1895                 kfree(jext);
1896         }
1897 }
1898
1899 /**
1900  * gfs2_add_jextent - Add or merge a new extent to extent cache
1901  * @jd: The journal descriptor
1902  * @lblock: The logical block at start of new extent
1903  * @dblock: The physical block at start of new extent
1904  * @blocks: Size of extent in fs blocks
1905  *
1906  * Returns: 0 on success or -ENOMEM
1907  */
1908
1909 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1910 {
1911         struct gfs2_journal_extent *jext;
1912
1913         if (!list_empty(&jd->extent_list)) {
1914                 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1915                 if ((jext->dblock + jext->blocks) == dblock) {
1916                         jext->blocks += blocks;
1917                         return 0;
1918                 }
1919         }
1920
1921         jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1922         if (jext == NULL)
1923                 return -ENOMEM;
1924         jext->dblock = dblock;
1925         jext->lblock = lblock;
1926         jext->blocks = blocks;
1927         list_add_tail(&jext->list, &jd->extent_list);
1928         jd->nr_extents++;
1929         return 0;
1930 }
1931
1932 /**
1933  * gfs2_map_journal_extents - Cache journal bmap info
1934  * @sdp: The super block
1935  * @jd: The journal to map
1936  *
1937  * Create a reusable "extent" mapping from all logical
1938  * blocks to all physical blocks for the given journal.  This will save
1939  * us time when writing journal blocks.  Most journals will have only one
1940  * extent that maps all their logical blocks.  That's because gfs2.mkfs
1941  * arranges the journal blocks sequentially to maximize performance.
1942  * So the extent would map the first block for the entire file length.
1943  * However, gfs2_jadd can happen while file activity is happening, so
1944  * those journals may not be sequential.  Less likely is the case where
1945  * the users created their own journals by mounting the metafs and
1946  * laying it out.  But it's still possible.  These journals might have
1947  * several extents.
1948  *
1949  * Returns: 0 on success, or error on failure
1950  */
1951
1952 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1953 {
1954         u64 lblock = 0;
1955         u64 lblock_stop;
1956         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1957         struct buffer_head bh;
1958         unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1959         u64 size;
1960         int rc;
1961
1962         lblock_stop = i_size_read(jd->jd_inode) >> shift;
1963         size = (lblock_stop - lblock) << shift;
1964         jd->nr_extents = 0;
1965         WARN_ON(!list_empty(&jd->extent_list));
1966
1967         do {
1968                 bh.b_state = 0;
1969                 bh.b_blocknr = 0;
1970                 bh.b_size = size;
1971                 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
1972                 if (rc || !buffer_mapped(&bh))
1973                         goto fail;
1974                 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
1975                 if (rc)
1976                         goto fail;
1977                 size -= bh.b_size;
1978                 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1979         } while(size > 0);
1980
1981         fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
1982                 jd->nr_extents);
1983         return 0;
1984
1985 fail:
1986         fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
1987                 rc, jd->jd_jid,
1988                 (unsigned long long)(i_size_read(jd->jd_inode) - size),
1989                 jd->nr_extents);
1990         fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
1991                 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
1992                 bh.b_state, (unsigned long long)bh.b_size);
1993         gfs2_free_journal_extents(jd);
1994         return rc;
1995 }
1996
1997 /**
1998  * gfs2_write_alloc_required - figure out if a write will require an allocation
1999  * @ip: the file being written to
2000  * @offset: the offset to write to
2001  * @len: the number of bytes being written
2002  *
2003  * Returns: 1 if an alloc is required, 0 otherwise
2004  */
2005
2006 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
2007                               unsigned int len)
2008 {
2009         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2010         struct buffer_head bh;
2011         unsigned int shift;
2012         u64 lblock, lblock_stop, size;
2013         u64 end_of_file;
2014
2015         if (!len)
2016                 return 0;
2017
2018         if (gfs2_is_stuffed(ip)) {
2019                 if (offset + len > gfs2_max_stuffed_size(ip))
2020                         return 1;
2021                 return 0;
2022         }
2023
2024         shift = sdp->sd_sb.sb_bsize_shift;
2025         BUG_ON(gfs2_is_dir(ip));
2026         end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
2027         lblock = offset >> shift;
2028         lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2029         if (lblock_stop > end_of_file)
2030                 return 1;
2031
2032         size = (lblock_stop - lblock) << shift;
2033         do {
2034                 bh.b_state = 0;
2035                 bh.b_size = size;
2036                 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2037                 if (!buffer_mapped(&bh))
2038                         return 1;
2039                 size -= bh.b_size;
2040                 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2041         } while(size > 0);
2042
2043         return 0;
2044 }
2045
2046 static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2047 {
2048         struct gfs2_inode *ip = GFS2_I(inode);
2049         struct buffer_head *dibh;
2050         int error;
2051
2052         if (offset >= inode->i_size)
2053                 return 0;
2054         if (offset + length > inode->i_size)
2055                 length = inode->i_size - offset;
2056
2057         error = gfs2_meta_inode_buffer(ip, &dibh);
2058         if (error)
2059                 return error;
2060         gfs2_trans_add_meta(ip->i_gl, dibh);
2061         memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2062                length);
2063         brelse(dibh);
2064         return 0;
2065 }
2066
2067 static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2068                                          loff_t length)
2069 {
2070         struct gfs2_sbd *sdp = GFS2_SB(inode);
2071         loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2072         int error;
2073
2074         while (length) {
2075                 struct gfs2_trans *tr;
2076                 loff_t chunk;
2077                 unsigned int offs;
2078
2079                 chunk = length;
2080                 if (chunk > max_chunk)
2081                         chunk = max_chunk;
2082
2083                 offs = offset & ~PAGE_MASK;
2084                 if (offs && chunk > PAGE_SIZE)
2085                         chunk = offs + ((chunk - offs) & PAGE_MASK);
2086
2087                 truncate_pagecache_range(inode, offset, chunk);
2088                 offset += chunk;
2089                 length -= chunk;
2090
2091                 tr = current->journal_info;
2092                 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2093                         continue;
2094
2095                 gfs2_trans_end(sdp);
2096                 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2097                 if (error)
2098                         return error;
2099         }
2100         return 0;
2101 }
2102
2103 int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2104 {
2105         struct inode *inode = file_inode(file);
2106         struct gfs2_inode *ip = GFS2_I(inode);
2107         struct gfs2_sbd *sdp = GFS2_SB(inode);
2108         int error;
2109
2110         if (gfs2_is_jdata(ip))
2111                 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2112                                          GFS2_JTRUNC_REVOKES);
2113         else
2114                 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2115         if (error)
2116                 return error;
2117
2118         if (gfs2_is_stuffed(ip)) {
2119                 error = stuffed_zero_range(inode, offset, length);
2120                 if (error)
2121                         goto out;
2122         } else {
2123                 unsigned int start_off, end_off, blocksize;
2124
2125                 blocksize = i_blocksize(inode);
2126                 start_off = offset & (blocksize - 1);
2127                 end_off = (offset + length) & (blocksize - 1);
2128                 if (start_off) {
2129                         unsigned int len = length;
2130                         if (length > blocksize - start_off)
2131                                 len = blocksize - start_off;
2132                         error = gfs2_block_zero_range(inode, offset, len);
2133                         if (error)
2134                                 goto out;
2135                         if (start_off + length < blocksize)
2136                                 end_off = 0;
2137                 }
2138                 if (end_off) {
2139                         error = gfs2_block_zero_range(inode,
2140                                 offset + length - end_off, end_off);
2141                         if (error)
2142                                 goto out;
2143                 }
2144         }
2145
2146         if (gfs2_is_jdata(ip)) {
2147                 BUG_ON(!current->journal_info);
2148                 gfs2_journaled_truncate_range(inode, offset, length);
2149         } else
2150                 truncate_pagecache_range(inode, offset, offset + length - 1);
2151
2152         file_update_time(file);
2153         mark_inode_dirty(inode);
2154
2155         if (current->journal_info)
2156                 gfs2_trans_end(sdp);
2157
2158         if (!gfs2_is_stuffed(ip))
2159                 error = punch_hole(ip, offset, length);
2160
2161 out:
2162         if (current->journal_info)
2163                 gfs2_trans_end(sdp);
2164         return error;
2165 }