fs/ext4/fast_commit.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * fs/ext4/fast_commit.c
   5  *
   6  * Written by Harshad Shirwadkar <[email protected]>
   7  *
   8  * Ext4 fast commits routines.
   9  */
  10 #include "ext4.h"
  11 #include "ext4_jbd2.h"
  12 #include "ext4_extents.h"
  13 #include "mballoc.h"
  14
  15 /*
  16  * Ext4 Fast Commits
  17  * -----------------
  18  *
  19  * Ext4 fast commits implement fine grained journalling for Ext4.
  20  *
  21  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
  22  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
  23  * TLV during the recovery phase. For the scenarios for which we currently
  24  * don't have replay code, fast commit falls back to full commits.
  25  * Fast commits record delta in one of the following three categories.
  26  *
  27  * (A) Directory entry updates:
  28  *
  29  * - EXT4_FC_TAG_UNLINK         - records directory entry unlink
  30  * - EXT4_FC_TAG_LINK           - records directory entry link
  31  * - EXT4_FC_TAG_CREAT          - records inode and directory entry creation
  32  *
  33  * (B) File specific data range updates:
  34  *
  35  * - EXT4_FC_TAG_ADD_RANGE      - records addition of new blocks to an inode
  36  * - EXT4_FC_TAG_DEL_RANGE      - records deletion of blocks from an inode
  37  *
  38  * (C) Inode metadata (mtime / ctime etc):
  39  *
  40  * - EXT4_FC_TAG_INODE          - record the inode that should be replayed
  41  *                                during recovery. Note that iblocks field is
  42  *                                not replayed and instead derived during
  43  *                                replay.
  44  * Commit Operation
  45  * ----------------
  46  * With fast commits, we maintain all the directory entry operations in the
  47  * order in which they are issued in an in-memory queue. This queue is flushed
  48  * to disk during the commit operation. We also maintain a list of inodes
  49  * that need to be committed during a fast commit in another in memory queue of
  50  * inodes. During the commit operation, we commit in the following order:
  51  *
  52  * [1] Lock inodes for any further data updates by setting COMMITTING state
  53  * [2] Submit data buffers of all the inodes
  54  * [3] Wait for [2] to complete
  55  * [4] Commit all the directory entry updates in the fast commit space
  56  * [5] Commit all the changed inode structures
  57  * [6] Write tail tag (this tag ensures the atomicity, please read the following
  58  *     section for more details).
  59  * [7] Wait for [4], [5] and [6] to complete.
  60  *
  61  * All the inode updates must call ext4_fc_start_update() before starting an
  62  * update. If such an ongoing update is present, fast commit waits for it to
  63  * complete. The completion of such an update is marked by
  64  * ext4_fc_stop_update().
  65  *
  66  * Fast Commit Ineligibility
  67  * -------------------------
  68  *
  69  * Not all operations are supported by fast commits today (e.g extended
  70  * attributes). Fast commit ineligibility is marked by calling
  71  * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
  72  * to full commit.
  73  *
  74  * Atomicity of commits
  75  * --------------------
  76  * In order to guarantee atomicity during the commit operation, fast commit
  77  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
  78  * tag contains CRC of the contents and TID of the transaction after which
  79  * this fast commit should be applied. Recovery code replays fast commit
  80  * logs only if there's at least 1 valid tail present. For every fast commit
  81  * operation, there is 1 tail. This means, we may end up with multiple tails
  82  * in the fast commit space. Here's an example:
  83  *
  84  * - Create a new file A and remove existing file B
  85  * - fsync()
  86  * - Append contents to file A
  87  * - Truncate file A
  88  * - fsync()
  89  *
  90  * The fast commit space at the end of above operations would look like this:
  91  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
  92  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
  93  *
  94  * Replay code should thus check for all the valid tails in the FC area.
  95  *
  96  * Fast Commit Replay Idempotence
  97  * ------------------------------
  98  *
  99  * Fast commits tags are idempotent in nature provided the recovery code follows
 100  * certain rules. The guiding principle that the commit path follows while
 101  * committing is that it stores the result of a particular operation instead of
 102  * storing the procedure.
 103  *
 104  * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
 105  * was associated with inode 10. During fast commit, instead of storing this
 106  * operation as a procedure "rename a to b", we store the resulting file system
 107  * state as a "series" of outcomes:
 108  *
 109  * - Link dirent b to inode 10
 110  * - Unlink dirent a
 111  * - Inode <10> with valid refcount
 112  *
 113  * Now when recovery code runs, it needs "enforce" this state on the file
 114  * system. This is what guarantees idempotence of fast commit replay.
 115  *
 116  * Let's take an example of a procedure that is not idempotent and see how fast
 117  * commits make it idempotent. Consider following sequence of operations:
 118  *
 119  *     rm A;    mv B A;    read A
 120  *  (x)     (y)        (z)
 121  *
 122  * (x), (y) and (z) are the points at which we can crash. If we store this
 123  * sequence of operations as is then the replay is not idempotent. Let's say
 124  * while in replay, we crash at (z). During the second replay, file A (which was
 125  * actually created as a result of "mv B A" operation) would get deleted. Thus,
 126  * file named A would be absent when we try to read A. So, this sequence of
 127  * operations is not idempotent. However, as mentioned above, instead of storing
 128  * the procedure fast commits store the outcome of each procedure. Thus the fast
 129  * commit log for above procedure would be as follows:
 130  *
 131  * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
 132  * inode 11 before the replay)
 133  *
 134  *    [Unlink A]   [Link A to inode 11]   [Unlink B]   [Inode 11]
 135  * (w)          (x)                    (y)          (z)
 136  *
 137  * If we crash at (z), we will have file A linked to inode 11. During the second
 138  * replay, we will remove file A (inode 11). But we will create it back and make
 139  * it point to inode 11. We won't find B, so we'll just skip that step. At this
 140  * point, the refcount for inode 11 is not reliable, but that gets fixed by the
 141  * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
 142  * similarly. Thus, by converting a non-idempotent procedure into a series of
 143  * idempotent outcomes, fast commits ensured idempotence during the replay.
 144  *
 145  * TODOs
 146  * -----
 147  *
 148  * 0) Fast commit replay path hardening: Fast commit replay code should use
 149  *    journal handles to make sure all the updates it does during the replay
 150  *    path are atomic. With that if we crash during fast commit replay, after
 151  *    trying to do recovery again, we will find a file system where fast commit
 152  *    area is invalid (because new full commit would be found). In order to deal
 153  *    with that, fast commit replay code should ensure that the "FC_REPLAY"
 154  *    superblock state is persisted before starting the replay, so that after
 155  *    the crash, fast commit recovery code can look at that flag and perform
 156  *    fast commit recovery even if that area is invalidated by later full
 157  *    commits.
 158  *
 159  * 1) Fast commit's commit path locks the entire file system during fast
 160  *    commit. This has significant performance penalty. Instead of that, we
 161  *    should use ext4_fc_start/stop_update functions to start inode level
 162  *    updates from ext4_journal_start/stop. Once we do that we can drop file
 163  *    system locking during commit path.
 164  *
 165  * 2) Handle more ineligible cases.
 166  */
 167
 168 #include <trace/events/ext4.h>
 169 static struct kmem_cache *ext4_fc_dentry_cachep;
 170
 171 static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 172 {
 173         BUFFER_TRACE(bh, "");
 174         if (uptodate) {
 175                 ext4_debug("%s: Block %lld up-to-date",
 176                            __func__, bh->b_blocknr);
 177                 set_buffer_uptodate(bh);
 178         } else {
 179                 ext4_debug("%s: Block %lld not up-to-date",
 180                            __func__, bh->b_blocknr);
 181                 clear_buffer_uptodate(bh);
 182         }
 183
 184         unlock_buffer(bh);
 185 }
 186
 187 static inline void ext4_fc_reset_inode(struct inode *inode)
 188 {
 189         struct ext4_inode_info *ei = EXT4_I(inode);
 190
 191         ei->i_fc_lblk_start = 0;
 192         ei->i_fc_lblk_len = 0;
 193 }
 194
 195 void ext4_fc_init_inode(struct inode *inode)
 196 {
 197         struct ext4_inode_info *ei = EXT4_I(inode);
 198
 199         ext4_fc_reset_inode(inode);
 200         ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
 201         INIT_LIST_HEAD(&ei->i_fc_list);
 202         INIT_LIST_HEAD(&ei->i_fc_dilist);
 203         init_waitqueue_head(&ei->i_fc_wait);
 204         atomic_set(&ei->i_fc_updates, 0);
 205 }
 206
 207 /* This function must be called with sbi->s_fc_lock held. */
 208 static void ext4_fc_wait_committing_inode(struct inode *inode)
 209 __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
 210 {
 211         wait_queue_head_t *wq;
 212         struct ext4_inode_info *ei = EXT4_I(inode);
 213
 214 #if (BITS_PER_LONG < 64)
 215         DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
 216                         EXT4_STATE_FC_COMMITTING);
 217         wq = bit_waitqueue(&ei->i_state_flags,
 218                                 EXT4_STATE_FC_COMMITTING);
 219 #else
 220         DEFINE_WAIT_BIT(wait, &ei->i_flags,
 221                         EXT4_STATE_FC_COMMITTING);
 222         wq = bit_waitqueue(&ei->i_flags,
 223                                 EXT4_STATE_FC_COMMITTING);
 224 #endif
 225         lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
 226         prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 227         spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
 228         schedule();
 229         finish_wait(wq, &wait.wq_entry);
 230 }
 231
 232 static bool ext4_fc_disabled(struct super_block *sb)
 233 {
 234         return (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
 235                 (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY));
 236 }
 237
 238 /*
 239  * Inform Ext4's fast about start of an inode update
 240  *
 241  * This function is called by the high level call VFS callbacks before
 242  * performing any inode update. This function blocks if there's an ongoing
 243  * fast commit on the inode in question.
 244  */
 245 void ext4_fc_start_update(struct inode *inode)
 246 {
 247         struct ext4_inode_info *ei = EXT4_I(inode);
 248
 249         if (ext4_fc_disabled(inode->i_sb))
 250                 return;
 251
 252 restart:
 253         spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
 254         if (list_empty(&ei->i_fc_list))
 255                 goto out;
 256
 257         if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
 258                 ext4_fc_wait_committing_inode(inode);
 259                 goto restart;
 260         }
 261 out:
 262         atomic_inc(&ei->i_fc_updates);
 263         spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
 264 }
 265
 266 /*
 267  * Stop inode update and wake up waiting fast commits if any.
 268  */
 269 void ext4_fc_stop_update(struct inode *inode)
 270 {
 271         struct ext4_inode_info *ei = EXT4_I(inode);
 272
 273         if (ext4_fc_disabled(inode->i_sb))
 274                 return;
 275
 276         if (atomic_dec_and_test(&ei->i_fc_updates))
 277                 wake_up_all(&ei->i_fc_wait);
 278 }
 279
 280 /*
 281  * Remove inode from fast commit list. If the inode is being committed
 282  * we wait until inode commit is done.
 283  */
 284 void ext4_fc_del(struct inode *inode)
 285 {
 286         struct ext4_inode_info *ei = EXT4_I(inode);
 287         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 288         struct ext4_fc_dentry_update *fc_dentry;
 289
 290         if (ext4_fc_disabled(inode->i_sb))
 291                 return;
 292
 293 restart:
 294         spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
 295         if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
 296                 spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
 297                 return;
 298         }
 299
 300         if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
 301                 ext4_fc_wait_committing_inode(inode);
 302                 goto restart;
 303         }
 304
 305         if (!list_empty(&ei->i_fc_list))
 306                 list_del_init(&ei->i_fc_list);
 307
 308         /*
 309          * Since this inode is getting removed, let's also remove all FC
 310          * dentry create references, since it is not needed to log it anyways.
 311          */
 312         if (list_empty(&ei->i_fc_dilist)) {
 313                 spin_unlock(&sbi->s_fc_lock);
 314                 return;
 315         }
 316
 317         fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
 318         WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
 319         list_del_init(&fc_dentry->fcd_list);
 320         list_del_init(&fc_dentry->fcd_dilist);
 321
 322         WARN_ON(!list_empty(&ei->i_fc_dilist));
 323         spin_unlock(&sbi->s_fc_lock);
 324
 325         if (fc_dentry->fcd_name.name &&
 326                 fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
 327                 kfree(fc_dentry->fcd_name.name);
 328         kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
 329
 330         return;
 331 }
 332
 333 /*
 334  * Mark file system as fast commit ineligible, and record latest
 335  * ineligible transaction tid. This means until the recorded
 336  * transaction, commit operation would result in a full jbd2 commit.
 337  */
 338 void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
 339 {
 340         struct ext4_sb_info *sbi = EXT4_SB(sb);
 341         tid_t tid;
 342
 343         if (ext4_fc_disabled(sb))
 344                 return;
 345
 346         ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
 347         if (handle && !IS_ERR(handle))
 348                 tid = handle->h_transaction->t_tid;
 349         else {
 350                 read_lock(&sbi->s_journal->j_state_lock);
 351                 tid = sbi->s_journal->j_running_transaction ?
 352                                 sbi->s_journal->j_running_transaction->t_tid : 0;
 353                 read_unlock(&sbi->s_journal->j_state_lock);
 354         }
 355         spin_lock(&sbi->s_fc_lock);
 356         if (sbi->s_fc_ineligible_tid < tid)
 357                 sbi->s_fc_ineligible_tid = tid;
 358         spin_unlock(&sbi->s_fc_lock);
 359         WARN_ON(reason >= EXT4_FC_REASON_MAX);
 360         sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
 361 }
 362
 363 /*
 364  * Generic fast commit tracking function. If this is the first time this we are
 365  * called after a full commit, we initialize fast commit fields and then call
 366  * __fc_track_fn() with update = 0. If we have already been called after a full
 367  * commit, we pass update = 1. Based on that, the track function can determine
 368  * if it needs to track a field for the first time or if it needs to just
 369  * update the previously tracked value.
 370  *
 371  * If enqueue is set, this function enqueues the inode in fast commit list.
 372  */
 373 static int ext4_fc_track_template(
 374         handle_t *handle, struct inode *inode,
 375         int (*__fc_track_fn)(struct inode *, void *, bool),
 376         void *args, int enqueue)
 377 {
 378         bool update = false;
 379         struct ext4_inode_info *ei = EXT4_I(inode);
 380         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 381         tid_t tid = 0;
 382         int ret;
 383
 384         tid = handle->h_transaction->t_tid;
 385         mutex_lock(&ei->i_fc_lock);
 386         if (tid == ei->i_sync_tid) {
 387                 update = true;
 388         } else {
 389                 ext4_fc_reset_inode(inode);
 390                 ei->i_sync_tid = tid;
 391         }
 392         ret = __fc_track_fn(inode, args, update);
 393         mutex_unlock(&ei->i_fc_lock);
 394
 395         if (!enqueue)
 396                 return ret;
 397
 398         spin_lock(&sbi->s_fc_lock);
 399         if (list_empty(&EXT4_I(inode)->i_fc_list))
 400                 list_add_tail(&EXT4_I(inode)->i_fc_list,
 401                                 (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
 402                                  sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
 403                                 &sbi->s_fc_q[FC_Q_STAGING] :
 404                                 &sbi->s_fc_q[FC_Q_MAIN]);
 405         spin_unlock(&sbi->s_fc_lock);
 406
 407         return ret;
 408 }
 409
 410 struct __track_dentry_update_args {
 411         struct dentry *dentry;
 412         int op;
 413 };
 414
 415 /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
 416 static int __track_dentry_update(struct inode *inode, void *arg, bool update)
 417 {
 418         struct ext4_fc_dentry_update *node;
 419         struct ext4_inode_info *ei = EXT4_I(inode);
 420         struct __track_dentry_update_args *dentry_update =
 421                 (struct __track_dentry_update_args *)arg;
 422         struct dentry *dentry = dentry_update->dentry;
 423         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 424
 425         mutex_unlock(&ei->i_fc_lock);
 426         node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
 427         if (!node) {
 428                 ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
 429                 mutex_lock(&ei->i_fc_lock);
 430                 return -ENOMEM;
 431         }
 432
 433         node->fcd_op = dentry_update->op;
 434         node->fcd_parent = dentry->d_parent->d_inode->i_ino;
 435         node->fcd_ino = inode->i_ino;
 436         if (dentry->d_name.len > DNAME_INLINE_LEN) {
 437                 node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
 438                 if (!node->fcd_name.name) {
 439                         kmem_cache_free(ext4_fc_dentry_cachep, node);
 440                         ext4_fc_mark_ineligible(inode->i_sb,
 441                                 EXT4_FC_REASON_NOMEM, NULL);
 442                         mutex_lock(&ei->i_fc_lock);
 443                         return -ENOMEM;
 444                 }
 445                 memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
 446                         dentry->d_name.len);
 447         } else {
 448                 memcpy(node->fcd_iname, dentry->d_name.name,
 449                         dentry->d_name.len);
 450                 node->fcd_name.name = node->fcd_iname;
 451         }
 452         node->fcd_name.len = dentry->d_name.len;
 453         INIT_LIST_HEAD(&node->fcd_dilist);
 454         spin_lock(&sbi->s_fc_lock);
 455         if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
 456                 sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
 457                 list_add_tail(&node->fcd_list,
 458                                 &sbi->s_fc_dentry_q[FC_Q_STAGING]);
 459         else
 460                 list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
 461
 462         /*
 463          * This helps us keep a track of all fc_dentry updates which is part of
 464          * this ext4 inode. So in case the inode is getting unlinked, before
 465          * even we get a chance to fsync, we could remove all fc_dentry
 466          * references while evicting the inode in ext4_fc_del().
 467          * Also with this, we don't need to loop over all the inodes in
 468          * sbi->s_fc_q to get the corresponding inode in
 469          * ext4_fc_commit_dentry_updates().
 470          */
 471         if (dentry_update->op == EXT4_FC_TAG_CREAT) {
 472                 WARN_ON(!list_empty(&ei->i_fc_dilist));
 473                 list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
 474         }
 475         spin_unlock(&sbi->s_fc_lock);
 476         mutex_lock(&ei->i_fc_lock);
 477
 478         return 0;
 479 }
 480
 481 void __ext4_fc_track_unlink(handle_t *handle,
 482                 struct inode *inode, struct dentry *dentry)
 483 {
 484         struct __track_dentry_update_args args;
 485         int ret;
 486
 487         args.dentry = dentry;
 488         args.op = EXT4_FC_TAG_UNLINK;
 489
 490         ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
 491                                         (void *)&args, 0);
 492         trace_ext4_fc_track_unlink(handle, inode, dentry, ret);
 493 }
 494
 495 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
 496 {
 497         struct inode *inode = d_inode(dentry);
 498
 499         if (ext4_fc_disabled(inode->i_sb))
 500                 return;
 501
 502         if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
 503                 return;
 504
 505         __ext4_fc_track_unlink(handle, inode, dentry);
 506 }
 507
 508 void __ext4_fc_track_link(handle_t *handle,
 509         struct inode *inode, struct dentry *dentry)
 510 {
 511         struct __track_dentry_update_args args;
 512         int ret;
 513
 514         args.dentry = dentry;
 515         args.op = EXT4_FC_TAG_LINK;
 516
 517         ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
 518                                         (void *)&args, 0);
 519         trace_ext4_fc_track_link(handle, inode, dentry, ret);
 520 }
 521
 522 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
 523 {
 524         struct inode *inode = d_inode(dentry);
 525
 526         if (ext4_fc_disabled(inode->i_sb))
 527                 return;
 528
 529         if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
 530                 return;
 531
 532         __ext4_fc_track_link(handle, inode, dentry);
 533 }
 534
 535 void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
 536                           struct dentry *dentry)
 537 {
 538         struct __track_dentry_update_args args;
 539         int ret;
 540
 541         args.dentry = dentry;
 542         args.op = EXT4_FC_TAG_CREAT;
 543
 544         ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
 545                                         (void *)&args, 0);
 546         trace_ext4_fc_track_create(handle, inode, dentry, ret);
 547 }
 548
 549 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
 550 {
 551         struct inode *inode = d_inode(dentry);
 552
 553         if (ext4_fc_disabled(inode->i_sb))
 554                 return;
 555
 556         if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
 557                 return;
 558
 559         __ext4_fc_track_create(handle, inode, dentry);
 560 }
 561
 562 /* __track_fn for inode tracking */
 563 static int __track_inode(struct inode *inode, void *arg, bool update)
 564 {
 565         if (update)
 566                 return -EEXIST;
 567
 568         EXT4_I(inode)->i_fc_lblk_len = 0;
 569
 570         return 0;
 571 }
 572
 573 void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
 574 {
 575         int ret;
 576
 577         if (S_ISDIR(inode->i_mode))
 578                 return;
 579
 580         if (ext4_fc_disabled(inode->i_sb))
 581                 return;
 582
 583         if (ext4_should_journal_data(inode)) {
 584                 ext4_fc_mark_ineligible(inode->i_sb,
 585                                         EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
 586                 return;
 587         }
 588
 589         if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
 590                 return;
 591
 592         ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
 593         trace_ext4_fc_track_inode(handle, inode, ret);
 594 }
 595
 596 struct __track_range_args {
 597         ext4_lblk_t start, end;
 598 };
 599
 600 /* __track_fn for tracking data updates */
 601 static int __track_range(struct inode *inode, void *arg, bool update)
 602 {
 603         struct ext4_inode_info *ei = EXT4_I(inode);
 604         ext4_lblk_t oldstart;
 605         struct __track_range_args *__arg =
 606                 (struct __track_range_args *)arg;
 607
 608         if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
 609                 ext4_debug("Special inode %ld being modified\n", inode->i_ino);
 610                 return -ECANCELED;
 611         }
 612
 613         oldstart = ei->i_fc_lblk_start;
 614
 615         if (update && ei->i_fc_lblk_len > 0) {
 616                 ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
 617                 ei->i_fc_lblk_len =
 618                         max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
 619                                 ei->i_fc_lblk_start + 1;
 620         } else {
 621                 ei->i_fc_lblk_start = __arg->start;
 622                 ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
 623         }
 624
 625         return 0;
 626 }
 627
 628 void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
 629                          ext4_lblk_t end)
 630 {
 631         struct __track_range_args args;
 632         int ret;
 633
 634         if (S_ISDIR(inode->i_mode))
 635                 return;
 636
 637         if (ext4_fc_disabled(inode->i_sb))
 638                 return;
 639
 640         if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
 641                 return;
 642
 643         args.start = start;
 644         args.end = end;
 645
 646         ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
 647
 648         trace_ext4_fc_track_range(handle, inode, start, end, ret);
 649 }
 650
 651 static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
 652 {
 653         blk_opf_t write_flags = REQ_SYNC;
 654         struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
 655
 656         /* Add REQ_FUA | REQ_PREFLUSH only its tail */
 657         if (test_opt(sb, BARRIER) && is_tail)
 658                 write_flags |= REQ_FUA | REQ_PREFLUSH;
 659         lock_buffer(bh);
 660         set_buffer_dirty(bh);
 661         set_buffer_uptodate(bh);
 662         bh->b_end_io = ext4_end_buffer_io_sync;
 663         submit_bh(REQ_OP_WRITE | write_flags, bh);
 664         EXT4_SB(sb)->s_fc_bh = NULL;
 665 }
 666
 667 /* Ext4 commit path routines */
 668
 669 /* memzero and update CRC */
 670 static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
 671                                 u32 *crc)
 672 {
 673         void *ret;
 674
 675         ret = memset(dst, 0, len);
 676         if (crc)
 677                 *crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
 678         return ret;
 679 }
 680
 681 /*
 682  * Allocate len bytes on a fast commit buffer.
 683  *
 684  * During the commit time this function is used to manage fast commit
 685  * block space. We don't split a fast commit log onto different
 686  * blocks. So this function makes sure that if there's not enough space
 687  * on the current block, the remaining space in the current block is
 688  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
 689  * new block is from jbd2 and CRC is updated to reflect the padding
 690  * we added.
 691  */
 692 static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
 693 {
 694         struct ext4_fc_tl *tl;
 695         struct ext4_sb_info *sbi = EXT4_SB(sb);
 696         struct buffer_head *bh;
 697         int bsize = sbi->s_journal->j_blocksize;
 698         int ret, off = sbi->s_fc_bytes % bsize;
 699         int pad_len;
 700
 701         /*
 702          * After allocating len, we should have space at least for a 0 byte
 703          * padding.
 704          */
 705         if (len + EXT4_FC_TAG_BASE_LEN > bsize)
 706                 return NULL;
 707
 708         if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) {
 709                 /*
 710                  * Only allocate from current buffer if we have enough space for
 711                  * this request AND we have space to add a zero byte padding.
 712                  */
 713                 if (!sbi->s_fc_bh) {
 714                         ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
 715                         if (ret)
 716                                 return NULL;
 717                         sbi->s_fc_bh = bh;
 718                 }
 719                 sbi->s_fc_bytes += len;
 720                 return sbi->s_fc_bh->b_data + off;
 721         }
 722         /* Need to add PAD tag */
 723         tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
 724         tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
 725         pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN;
 726         tl->fc_len = cpu_to_le16(pad_len);
 727         if (crc)
 728                 *crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN);
 729         if (pad_len > 0)
 730                 ext4_fc_memzero(sb, tl + 1, pad_len, crc);
 731         ext4_fc_submit_bh(sb, false);
 732
 733         ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
 734         if (ret)
 735                 return NULL;
 736         sbi->s_fc_bh = bh;
 737         sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
 738         return sbi->s_fc_bh->b_data;
 739 }
 740
 741 /* memcpy to fc reserved space and update CRC */
 742 static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
 743                                 int len, u32 *crc)
 744 {
 745         if (crc)
 746                 *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
 747         return memcpy(dst, src, len);
 748 }
 749
 750 /*
 751  * Complete a fast commit by writing tail tag.
 752  *
 753  * Writing tail tag marks the end of a fast commit. In order to guarantee
 754  * atomicity, after writing tail tag, even if there's space remaining
 755  * in the block, next commit shouldn't use it. That's why tail tag
 756  * has the length as that of the remaining space on the block.
 757  */
 758 static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
 759 {
 760         struct ext4_sb_info *sbi = EXT4_SB(sb);
 761         struct ext4_fc_tl tl;
 762         struct ext4_fc_tail tail;
 763         int off, bsize = sbi->s_journal->j_blocksize;
 764         u8 *dst;
 765
 766         /*
 767          * ext4_fc_reserve_space takes care of allocating an extra block if
 768          * there's no enough space on this block for accommodating this tail.
 769          */
 770         dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc);
 771         if (!dst)
 772                 return -ENOSPC;
 773
 774         off = sbi->s_fc_bytes % bsize;
 775
 776         tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
 777         tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
 778         sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
 779
 780         ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc);
 781         dst += EXT4_FC_TAG_BASE_LEN;
 782         tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
 783         ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
 784         dst += sizeof(tail.fc_tid);
 785         tail.fc_crc = cpu_to_le32(crc);
 786         ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
 787
 788         ext4_fc_submit_bh(sb, true);
 789
 790         return 0;
 791 }
 792
 793 /*
 794  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
 795  * Returns false if there's not enough space.
 796  */
 797 static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
 798                            u32 *crc)
 799 {
 800         struct ext4_fc_tl tl;
 801         u8 *dst;
 802
 803         dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc);
 804         if (!dst)
 805                 return false;
 806
 807         tl.fc_tag = cpu_to_le16(tag);
 808         tl.fc_len = cpu_to_le16(len);
 809
 810         ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
 811         ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc);
 812
 813         return true;
 814 }
 815
 816 /* Same as above, but adds dentry tlv. */
 817 static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
 818                                    struct ext4_fc_dentry_update *fc_dentry)
 819 {
 820         struct ext4_fc_dentry_info fcd;
 821         struct ext4_fc_tl tl;
 822         int dlen = fc_dentry->fcd_name.len;
 823         u8 *dst = ext4_fc_reserve_space(sb,
 824                         EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc);
 825
 826         if (!dst)
 827                 return false;
 828
 829         fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
 830         fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
 831         tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
 832         tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
 833         ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
 834         dst += EXT4_FC_TAG_BASE_LEN;
 835         ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
 836         dst += sizeof(fcd);
 837         ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
 838
 839         return true;
 840 }
 841
 842 /*
 843  * Writes inode in the fast commit space under TLV with tag @tag.
 844  * Returns 0 on success, error on failure.
 845  */
 846 static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
 847 {
 848         struct ext4_inode_info *ei = EXT4_I(inode);
 849         int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
 850         int ret;
 851         struct ext4_iloc iloc;
 852         struct ext4_fc_inode fc_inode;
 853         struct ext4_fc_tl tl;
 854         u8 *dst;
 855
 856         ret = ext4_get_inode_loc(inode, &iloc);
 857         if (ret)
 858                 return ret;
 859
 860         if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
 861                 inode_len = EXT4_INODE_SIZE(inode->i_sb);
 862         else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
 863                 inode_len += ei->i_extra_isize;
 864
 865         fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
 866         tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
 867         tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
 868
 869         ret = -ECANCELED;
 870         dst = ext4_fc_reserve_space(inode->i_sb,
 871                 EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc);
 872         if (!dst)
 873                 goto err;
 874
 875         if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc))
 876                 goto err;
 877         dst += EXT4_FC_TAG_BASE_LEN;
 878         if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
 879                 goto err;
 880         dst += sizeof(fc_inode);
 881         if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
 882                                         inode_len, crc))
 883                 goto err;
 884         ret = 0;
 885 err:
 886         brelse(iloc.bh);
 887         return ret;
 888 }
 889
 890 /*
 891  * Writes updated data ranges for the inode in question. Updates CRC.
 892  * Returns 0 on success, error otherwise.
 893  */
 894 static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
 895 {
 896         ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
 897         struct ext4_inode_info *ei = EXT4_I(inode);
 898         struct ext4_map_blocks map;
 899         struct ext4_fc_add_range fc_ext;
 900         struct ext4_fc_del_range lrange;
 901         struct ext4_extent *ex;
 902         int ret;
 903
 904         mutex_lock(&ei->i_fc_lock);
 905         if (ei->i_fc_lblk_len == 0) {
 906                 mutex_unlock(&ei->i_fc_lock);
 907                 return 0;
 908         }
 909         old_blk_size = ei->i_fc_lblk_start;
 910         new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
 911         ei->i_fc_lblk_len = 0;
 912         mutex_unlock(&ei->i_fc_lock);
 913
 914         cur_lblk_off = old_blk_size;
 915         ext4_debug("will try writing %d to %d for inode %ld\n",
 916                    cur_lblk_off, new_blk_size, inode->i_ino);
 917
 918         while (cur_lblk_off <= new_blk_size) {
 919                 map.m_lblk = cur_lblk_off;
 920                 map.m_len = new_blk_size - cur_lblk_off + 1;
 921                 ret = ext4_map_blocks(NULL, inode, &map, 0);
 922                 if (ret < 0)
 923                         return -ECANCELED;
 924
 925                 if (map.m_len == 0) {
 926                         cur_lblk_off++;
 927                         continue;
 928                 }
 929
 930                 if (ret == 0) {
 931                         lrange.fc_ino = cpu_to_le32(inode->i_ino);
 932                         lrange.fc_lblk = cpu_to_le32(map.m_lblk);
 933                         lrange.fc_len = cpu_to_le32(map.m_len);
 934                         if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
 935                                             sizeof(lrange), (u8 *)&lrange, crc))
 936                                 return -ENOSPC;
 937                 } else {
 938                         unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
 939                                 EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
 940
 941                         /* Limit the number of blocks in one extent */
 942                         map.m_len = min(max, map.m_len);
 943
 944                         fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
 945                         ex = (struct ext4_extent *)&fc_ext.fc_ex;
 946                         ex->ee_block = cpu_to_le32(map.m_lblk);
 947                         ex->ee_len = cpu_to_le16(map.m_len);
 948                         ext4_ext_store_pblock(ex, map.m_pblk);
 949                         if (map.m_flags & EXT4_MAP_UNWRITTEN)
 950                                 ext4_ext_mark_unwritten(ex);
 951                         else
 952                                 ext4_ext_mark_initialized(ex);
 953                         if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
 954                                             sizeof(fc_ext), (u8 *)&fc_ext, crc))
 955                                 return -ENOSPC;
 956                 }
 957
 958                 cur_lblk_off += map.m_len;
 959         }
 960
 961         return 0;
 962 }
 963
 964
 965 /* Submit data for all the fast commit inodes */
 966 static int ext4_fc_submit_inode_data_all(journal_t *journal)
 967 {
 968         struct super_block *sb = journal->j_private;
 969         struct ext4_sb_info *sbi = EXT4_SB(sb);
 970         struct ext4_inode_info *ei;
 971         int ret = 0;
 972
 973         spin_lock(&sbi->s_fc_lock);
 974         list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
 975                 ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
 976                 while (atomic_read(&ei->i_fc_updates)) {
 977                         DEFINE_WAIT(wait);
 978
 979                         prepare_to_wait(&ei->i_fc_wait, &wait,
 980                                                 TASK_UNINTERRUPTIBLE);
 981                         if (atomic_read(&ei->i_fc_updates)) {
 982                                 spin_unlock(&sbi->s_fc_lock);
 983                                 schedule();
 984                                 spin_lock(&sbi->s_fc_lock);
 985                         }
 986                         finish_wait(&ei->i_fc_wait, &wait);
 987                 }
 988                 spin_unlock(&sbi->s_fc_lock);
 989                 ret = jbd2_submit_inode_data(ei->jinode);
 990                 if (ret)
 991                         return ret;
 992                 spin_lock(&sbi->s_fc_lock);
 993         }
 994         spin_unlock(&sbi->s_fc_lock);
 995
 996         return ret;
 997 }
 998
 999 /* Wait for completion of data for all the fast commit inodes */
1000 static int ext4_fc_wait_inode_data_all(journal_t *journal)
1001 {
1002         struct super_block *sb = journal->j_private;
1003         struct ext4_sb_info *sbi = EXT4_SB(sb);
1004         struct ext4_inode_info *pos, *n;
1005         int ret = 0;
1006
1007         spin_lock(&sbi->s_fc_lock);
1008         list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1009                 if (!ext4_test_inode_state(&pos->vfs_inode,
1010                                            EXT4_STATE_FC_COMMITTING))
1011                         continue;
1012                 spin_unlock(&sbi->s_fc_lock);
1013
1014                 ret = jbd2_wait_inode_data(journal, pos->jinode);
1015                 if (ret)
1016                         return ret;
1017                 spin_lock(&sbi->s_fc_lock);
1018         }
1019         spin_unlock(&sbi->s_fc_lock);
1020
1021         return 0;
1022 }
1023
1024 /* Commit all the directory entry updates */
1025 static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
1026 __acquires(&sbi->s_fc_lock)
1027 __releases(&sbi->s_fc_lock)
1028 {
1029         struct super_block *sb = journal->j_private;
1030         struct ext4_sb_info *sbi = EXT4_SB(sb);
1031         struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
1032         struct inode *inode;
1033         struct ext4_inode_info *ei;
1034         int ret;
1035
1036         if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
1037                 return 0;
1038         list_for_each_entry_safe(fc_dentry, fc_dentry_n,
1039                                  &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
1040                 if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
1041                         spin_unlock(&sbi->s_fc_lock);
1042                         if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1043                                 ret = -ENOSPC;
1044                                 goto lock_and_exit;
1045                         }
1046                         spin_lock(&sbi->s_fc_lock);
1047                         continue;
1048                 }
1049                 /*
1050                  * With fcd_dilist we need not loop in sbi->s_fc_q to get the
1051                  * corresponding inode pointer
1052                  */
1053                 WARN_ON(list_empty(&fc_dentry->fcd_dilist));
1054                 ei = list_first_entry(&fc_dentry->fcd_dilist,
1055                                 struct ext4_inode_info, i_fc_dilist);
1056                 inode = &ei->vfs_inode;
1057                 WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
1058
1059                 spin_unlock(&sbi->s_fc_lock);
1060
1061                 /*
1062                  * We first write the inode and then the create dirent. This
1063                  * allows the recovery code to create an unnamed inode first
1064                  * and then link it to a directory entry. This allows us
1065                  * to use namei.c routines almost as is and simplifies
1066                  * the recovery code.
1067                  */
1068                 ret = ext4_fc_write_inode(inode, crc);
1069                 if (ret)
1070                         goto lock_and_exit;
1071
1072                 ret = ext4_fc_write_inode_data(inode, crc);
1073                 if (ret)
1074                         goto lock_and_exit;
1075
1076                 if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
1077                         ret = -ENOSPC;
1078                         goto lock_and_exit;
1079                 }
1080
1081                 spin_lock(&sbi->s_fc_lock);
1082         }
1083         return 0;
1084 lock_and_exit:
1085         spin_lock(&sbi->s_fc_lock);
1086         return ret;
1087 }
1088
1089 static int ext4_fc_perform_commit(journal_t *journal)
1090 {
1091         struct super_block *sb = journal->j_private;
1092         struct ext4_sb_info *sbi = EXT4_SB(sb);
1093         struct ext4_inode_info *iter;
1094         struct ext4_fc_head head;
1095         struct inode *inode;
1096         struct blk_plug plug;
1097         int ret = 0;
1098         u32 crc = 0;
1099
1100         ret = ext4_fc_submit_inode_data_all(journal);
1101         if (ret)
1102                 return ret;
1103
1104         ret = ext4_fc_wait_inode_data_all(journal);
1105         if (ret)
1106                 return ret;
1107
1108         /*
1109          * If file system device is different from journal device, issue a cache
1110          * flush before we start writing fast commit blocks.
1111          */
1112         if (journal->j_fs_dev != journal->j_dev)
1113                 blkdev_issue_flush(journal->j_fs_dev);
1114
1115         blk_start_plug(&plug);
1116         if (sbi->s_fc_bytes == 0) {
1117                 /*
1118                  * Add a head tag only if this is the first fast commit
1119                  * in this TID.
1120                  */
1121                 head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1122                 head.fc_tid = cpu_to_le32(
1123                         sbi->s_journal->j_running_transaction->t_tid);
1124                 if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1125                         (u8 *)&head, &crc)) {
1126                         ret = -ENOSPC;
1127                         goto out;
1128                 }
1129         }
1130
1131         spin_lock(&sbi->s_fc_lock);
1132         ret = ext4_fc_commit_dentry_updates(journal, &crc);
1133         if (ret) {
1134                 spin_unlock(&sbi->s_fc_lock);
1135                 goto out;
1136         }
1137
1138         list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
1139                 inode = &iter->vfs_inode;
1140                 if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1141                         continue;
1142
1143                 spin_unlock(&sbi->s_fc_lock);
1144                 ret = ext4_fc_write_inode_data(inode, &crc);
1145                 if (ret)
1146                         goto out;
1147                 ret = ext4_fc_write_inode(inode, &crc);
1148                 if (ret)
1149                         goto out;
1150                 spin_lock(&sbi->s_fc_lock);
1151         }
1152         spin_unlock(&sbi->s_fc_lock);
1153
1154         ret = ext4_fc_write_tail(sb, crc);
1155
1156 out:
1157         blk_finish_plug(&plug);
1158         return ret;
1159 }
1160
1161 static void ext4_fc_update_stats(struct super_block *sb, int status,
1162                                  u64 commit_time, int nblks, tid_t commit_tid)
1163 {
1164         struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
1165
1166         ext4_debug("Fast commit ended with status = %d for tid %u",
1167                         status, commit_tid);
1168         if (status == EXT4_FC_STATUS_OK) {
1169                 stats->fc_num_commits++;
1170                 stats->fc_numblks += nblks;
1171                 if (likely(stats->s_fc_avg_commit_time))
1172                         stats->s_fc_avg_commit_time =
1173                                 (commit_time +
1174                                  stats->s_fc_avg_commit_time * 3) / 4;
1175                 else
1176                         stats->s_fc_avg_commit_time = commit_time;
1177         } else if (status == EXT4_FC_STATUS_FAILED ||
1178                    status == EXT4_FC_STATUS_INELIGIBLE) {
1179                 if (status == EXT4_FC_STATUS_FAILED)
1180                         stats->fc_failed_commits++;
1181                 stats->fc_ineligible_commits++;
1182         } else {
1183                 stats->fc_skipped_commits++;
1184         }
1185         trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid);
1186 }
1187
1188 /*
1189  * The main commit entry point. Performs a fast commit for transaction
1190  * commit_tid if needed. If it's not possible to perform a fast commit
1191  * due to various reasons, we fall back to full commit. Returns 0
1192  * on success, error otherwise.
1193  */
1194 int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1195 {
1196         struct super_block *sb = journal->j_private;
1197         struct ext4_sb_info *sbi = EXT4_SB(sb);
1198         int nblks = 0, ret, bsize = journal->j_blocksize;
1199         int subtid = atomic_read(&sbi->s_fc_subtid);
1200         int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
1201         ktime_t start_time, commit_time;
1202
1203         if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
1204                 return jbd2_complete_transaction(journal, commit_tid);
1205
1206         trace_ext4_fc_commit_start(sb, commit_tid);
1207
1208         start_time = ktime_get();
1209
1210 restart_fc:
1211         ret = jbd2_fc_begin_commit(journal, commit_tid);
1212         if (ret == -EALREADY) {
1213                 /* There was an ongoing commit, check if we need to restart */
1214                 if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1215                         commit_tid > journal->j_commit_sequence)
1216                         goto restart_fc;
1217                 ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
1218                                 commit_tid);
1219                 return 0;
1220         } else if (ret) {
1221                 /*
1222                  * Commit couldn't start. Just update stats and perform a
1223                  * full commit.
1224                  */
1225                 ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0,
1226                                 commit_tid);
1227                 return jbd2_complete_transaction(journal, commit_tid);
1228         }
1229
1230         /*
1231          * After establishing journal barrier via jbd2_fc_begin_commit(), check
1232          * if we are fast commit ineligible.
1233          */
1234         if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
1235                 status = EXT4_FC_STATUS_INELIGIBLE;
1236                 goto fallback;
1237         }
1238
1239         fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1240         ret = ext4_fc_perform_commit(journal);
1241         if (ret < 0) {
1242                 status = EXT4_FC_STATUS_FAILED;
1243                 goto fallback;
1244         }
1245         nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1246         ret = jbd2_fc_wait_bufs(journal, nblks);
1247         if (ret < 0) {
1248                 status = EXT4_FC_STATUS_FAILED;
1249                 goto fallback;
1250         }
1251         atomic_inc(&sbi->s_fc_subtid);
1252         ret = jbd2_fc_end_commit(journal);
1253         /*
1254          * weight the commit time higher than the average time so we
1255          * don't react too strongly to vast changes in the commit time
1256          */
1257         commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1258         ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid);
1259         return ret;
1260
1261 fallback:
1262         ret = jbd2_fc_end_commit_fallback(journal);
1263         ext4_fc_update_stats(sb, status, 0, 0, commit_tid);
1264         return ret;
1265 }
1266
1267 /*
1268  * Fast commit cleanup routine. This is called after every fast commit and
1269  * full commit. full is true if we are called after a full commit.
1270  */
1271 static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
1272 {
1273         struct super_block *sb = journal->j_private;
1274         struct ext4_sb_info *sbi = EXT4_SB(sb);
1275         struct ext4_inode_info *iter, *iter_n;
1276         struct ext4_fc_dentry_update *fc_dentry;
1277
1278         if (full && sbi->s_fc_bh)
1279                 sbi->s_fc_bh = NULL;
1280
1281         trace_ext4_fc_cleanup(journal, full, tid);
1282         jbd2_fc_release_bufs(journal);
1283
1284         spin_lock(&sbi->s_fc_lock);
1285         list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
1286                                  i_fc_list) {
1287                 list_del_init(&iter->i_fc_list);
1288                 ext4_clear_inode_state(&iter->vfs_inode,
1289                                        EXT4_STATE_FC_COMMITTING);
1290                 if (iter->i_sync_tid <= tid)
1291                         ext4_fc_reset_inode(&iter->vfs_inode);
1292                 /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1293                 smp_mb();
1294 #if (BITS_PER_LONG < 64)
1295                 wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1296 #else
1297                 wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1298 #endif
1299         }
1300
1301         while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1302                 fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1303                                              struct ext4_fc_dentry_update,
1304                                              fcd_list);
1305                 list_del_init(&fc_dentry->fcd_list);
1306                 list_del_init(&fc_dentry->fcd_dilist);
1307                 spin_unlock(&sbi->s_fc_lock);
1308
1309                 if (fc_dentry->fcd_name.name &&
1310                         fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1311                         kfree(fc_dentry->fcd_name.name);
1312                 kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1313                 spin_lock(&sbi->s_fc_lock);
1314         }
1315
1316         list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1317                                 &sbi->s_fc_dentry_q[FC_Q_MAIN]);
1318         list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
1319                                 &sbi->s_fc_q[FC_Q_MAIN]);
1320
1321         if (tid >= sbi->s_fc_ineligible_tid) {
1322                 sbi->s_fc_ineligible_tid = 0;
1323                 ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
1324         }
1325
1326         if (full)
1327                 sbi->s_fc_bytes = 0;
1328         spin_unlock(&sbi->s_fc_lock);
1329         trace_ext4_fc_stats(sb);
1330 }
1331
1332 /* Ext4 Replay Path Routines */
1333
1334 /* Helper struct for dentry replay routines */
1335 struct dentry_info_args {
1336         int parent_ino, dname_len, ino, inode_len;
1337         char *dname;
1338 };
1339
1340 static inline void tl_to_darg(struct dentry_info_args *darg,
1341                               struct ext4_fc_tl *tl, u8 *val)
1342 {
1343         struct ext4_fc_dentry_info fcd;
1344
1345         memcpy(&fcd, val, sizeof(fcd));
1346
1347         darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
1348         darg->ino = le32_to_cpu(fcd.fc_ino);
1349         darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
1350         darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
1351 }
1352
1353 static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val)
1354 {
1355         memcpy(tl, val, EXT4_FC_TAG_BASE_LEN);
1356         tl->fc_len = le16_to_cpu(tl->fc_len);
1357         tl->fc_tag = le16_to_cpu(tl->fc_tag);
1358 }
1359
1360 /* Unlink replay function */
1361 static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
1362                                  u8 *val)
1363 {
1364         struct inode *inode, *old_parent;
1365         struct qstr entry;
1366         struct dentry_info_args darg;
1367         int ret = 0;
1368
1369         tl_to_darg(&darg, tl, val);
1370
1371         trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
1372                         darg.parent_ino, darg.dname_len);
1373
1374         entry.name = darg.dname;
1375         entry.len = darg.dname_len;
1376         inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1377
1378         if (IS_ERR(inode)) {
1379                 ext4_debug("Inode %d not found", darg.ino);
1380                 return 0;
1381         }
1382
1383         old_parent = ext4_iget(sb, darg.parent_ino,
1384                                 EXT4_IGET_NORMAL);
1385         if (IS_ERR(old_parent)) {
1386                 ext4_debug("Dir with inode %d not found", darg.parent_ino);
1387                 iput(inode);
1388                 return 0;
1389         }
1390
1391         ret = __ext4_unlink(NULL, old_parent, &entry, inode);
1392         /* -ENOENT ok coz it might not exist anymore. */
1393         if (ret == -ENOENT)
1394                 ret = 0;
1395         iput(old_parent);
1396         iput(inode);
1397         return ret;
1398 }
1399
1400 static int ext4_fc_replay_link_internal(struct super_block *sb,
1401                                 struct dentry_info_args *darg,
1402                                 struct inode *inode)
1403 {
1404         struct inode *dir = NULL;
1405         struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
1406         struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
1407         int ret = 0;
1408
1409         dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
1410         if (IS_ERR(dir)) {
1411                 ext4_debug("Dir with inode %d not found.", darg->parent_ino);
1412                 dir = NULL;
1413                 goto out;
1414         }
1415
1416         dentry_dir = d_obtain_alias(dir);
1417         if (IS_ERR(dentry_dir)) {
1418                 ext4_debug("Failed to obtain dentry");
1419                 dentry_dir = NULL;
1420                 goto out;
1421         }
1422
1423         dentry_inode = d_alloc(dentry_dir, &qstr_dname);
1424         if (!dentry_inode) {
1425                 ext4_debug("Inode dentry not created.");
1426                 ret = -ENOMEM;
1427                 goto out;
1428         }
1429
1430         ret = __ext4_link(dir, inode, dentry_inode);
1431         /*
1432          * It's possible that link already existed since data blocks
1433          * for the dir in question got persisted before we crashed OR
1434          * we replayed this tag and crashed before the entire replay
1435          * could complete.
1436          */
1437         if (ret && ret != -EEXIST) {
1438                 ext4_debug("Failed to link\n");
1439                 goto out;
1440         }
1441
1442         ret = 0;
1443 out:
1444         if (dentry_dir) {
1445                 d_drop(dentry_dir);
1446                 dput(dentry_dir);
1447         } else if (dir) {
1448                 iput(dir);
1449         }
1450         if (dentry_inode) {
1451                 d_drop(dentry_inode);
1452                 dput(dentry_inode);
1453         }
1454
1455         return ret;
1456 }
1457
1458 /* Link replay function */
1459 static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
1460                                u8 *val)
1461 {
1462         struct inode *inode;
1463         struct dentry_info_args darg;
1464         int ret = 0;
1465
1466         tl_to_darg(&darg, tl, val);
1467         trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
1468                         darg.parent_ino, darg.dname_len);
1469
1470         inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1471         if (IS_ERR(inode)) {
1472                 ext4_debug("Inode not found.");
1473                 return 0;
1474         }
1475
1476         ret = ext4_fc_replay_link_internal(sb, &darg, inode);
1477         iput(inode);
1478         return ret;
1479 }
1480
1481 /*
1482  * Record all the modified inodes during replay. We use this later to setup
1483  * block bitmaps correctly.
1484  */
1485 static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
1486 {
1487         struct ext4_fc_replay_state *state;
1488         int i;
1489
1490         state = &EXT4_SB(sb)->s_fc_replay_state;
1491         for (i = 0; i < state->fc_modified_inodes_used; i++)
1492                 if (state->fc_modified_inodes[i] == ino)
1493                         return 0;
1494         if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
1495                 int *fc_modified_inodes;
1496
1497                 fc_modified_inodes = krealloc(state->fc_modified_inodes,
1498                                 sizeof(int) * (state->fc_modified_inodes_size +
1499                                 EXT4_FC_REPLAY_REALLOC_INCREMENT),
1500                                 GFP_KERNEL);
1501                 if (!fc_modified_inodes)
1502                         return -ENOMEM;
1503                 state->fc_modified_inodes = fc_modified_inodes;
1504                 state->fc_modified_inodes_size +=
1505                         EXT4_FC_REPLAY_REALLOC_INCREMENT;
1506         }
1507         state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
1508         return 0;
1509 }
1510
1511 /*
1512  * Inode replay function
1513  */
1514 static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
1515                                 u8 *val)
1516 {
1517         struct ext4_fc_inode fc_inode;
1518         struct ext4_inode *raw_inode;
1519         struct ext4_inode *raw_fc_inode;
1520         struct inode *inode = NULL;
1521         struct ext4_iloc iloc;
1522         int inode_len, ino, ret, tag = tl->fc_tag;
1523         struct ext4_extent_header *eh;
1524
1525         memcpy(&fc_inode, val, sizeof(fc_inode));
1526
1527         ino = le32_to_cpu(fc_inode.fc_ino);
1528         trace_ext4_fc_replay(sb, tag, ino, 0, 0);
1529
1530         inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
1531         if (!IS_ERR(inode)) {
1532                 ext4_ext_clear_bb(inode);
1533                 iput(inode);
1534         }
1535         inode = NULL;
1536
1537         ret = ext4_fc_record_modified_inode(sb, ino);
1538         if (ret)
1539                 goto out;
1540
1541         raw_fc_inode = (struct ext4_inode *)
1542                 (val + offsetof(struct ext4_fc_inode, fc_raw_inode));
1543         ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
1544         if (ret)
1545                 goto out;
1546
1547         inode_len = tl->fc_len - sizeof(struct ext4_fc_inode);
1548         raw_inode = ext4_raw_inode(&iloc);
1549
1550         memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
1551         memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
1552                 inode_len - offsetof(struct ext4_inode, i_generation));
1553         if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
1554                 eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
1555                 if (eh->eh_magic != EXT4_EXT_MAGIC) {
1556                         memset(eh, 0, sizeof(*eh));
1557                         eh->eh_magic = EXT4_EXT_MAGIC;
1558                         eh->eh_max = cpu_to_le16(
1559                                 (sizeof(raw_inode->i_block) -
1560                                  sizeof(struct ext4_extent_header))
1561                                  / sizeof(struct ext4_extent));
1562                 }
1563         } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
1564                 memcpy(raw_inode->i_block, raw_fc_inode->i_block,
1565                         sizeof(raw_inode->i_block));
1566         }
1567
1568         /* Immediately update the inode on disk. */
1569         ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
1570         if (ret)
1571                 goto out;
1572         ret = sync_dirty_buffer(iloc.bh);
1573         if (ret)
1574                 goto out;
1575         ret = ext4_mark_inode_used(sb, ino);
1576         if (ret)
1577                 goto out;
1578
1579         /* Given that we just wrote the inode on disk, this SHOULD succeed. */
1580         inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
1581         if (IS_ERR(inode)) {
1582                 ext4_debug("Inode not found.");
1583                 return -EFSCORRUPTED;
1584         }
1585
1586         /*
1587          * Our allocator could have made different decisions than before
1588          * crashing. This should be fixed but until then, we calculate
1589          * the number of blocks the inode.
1590          */
1591         if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
1592                 ext4_ext_replay_set_iblocks(inode);
1593
1594         inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
1595         ext4_reset_inode_seed(inode);
1596
1597         ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
1598         ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
1599         sync_dirty_buffer(iloc.bh);
1600         brelse(iloc.bh);
1601 out:
1602         iput(inode);
1603         if (!ret)
1604                 blkdev_issue_flush(sb->s_bdev);
1605
1606         return 0;
1607 }
1608
1609 /*
1610  * Dentry create replay function.
1611  *
1612  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
1613  * inode for which we are trying to create a dentry here, should already have
1614  * been replayed before we start here.
1615  */
1616 static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
1617                                  u8 *val)
1618 {
1619         int ret = 0;
1620         struct inode *inode = NULL;
1621         struct inode *dir = NULL;
1622         struct dentry_info_args darg;
1623
1624         tl_to_darg(&darg, tl, val);
1625
1626         trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
1627                         darg.parent_ino, darg.dname_len);
1628
1629         /* This takes care of update group descriptor and other metadata */
1630         ret = ext4_mark_inode_used(sb, darg.ino);
1631         if (ret)
1632                 goto out;
1633
1634         inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1635         if (IS_ERR(inode)) {
1636                 ext4_debug("inode %d not found.", darg.ino);
1637                 inode = NULL;
1638                 ret = -EINVAL;
1639                 goto out;
1640         }
1641
1642         if (S_ISDIR(inode->i_mode)) {
1643                 /*
1644                  * If we are creating a directory, we need to make sure that the
1645                  * dot and dot dot dirents are setup properly.
1646                  */
1647                 dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
1648                 if (IS_ERR(dir)) {
1649                         ext4_debug("Dir %d not found.", darg.ino);
1650                         goto out;
1651                 }
1652                 ret = ext4_init_new_dir(NULL, dir, inode);
1653                 iput(dir);
1654                 if (ret) {
1655                         ret = 0;
1656                         goto out;
1657                 }
1658         }
1659         ret = ext4_fc_replay_link_internal(sb, &darg, inode);
1660         if (ret)
1661                 goto out;
1662         set_nlink(inode, 1);
1663         ext4_mark_inode_dirty(NULL, inode);
1664 out:
1665         iput(inode);
1666         return ret;
1667 }
1668
1669 /*
1670  * Record physical disk regions which are in use as per fast commit area,
1671  * and used by inodes during replay phase. Our simple replay phase
1672  * allocator excludes these regions from allocation.
1673  */
1674 int ext4_fc_record_regions(struct super_block *sb, int ino,
1675                 ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
1676 {
1677         struct ext4_fc_replay_state *state;
1678         struct ext4_fc_alloc_region *region;
1679
1680         state = &EXT4_SB(sb)->s_fc_replay_state;
1681         /*
1682          * during replay phase, the fc_regions_valid may not same as
1683          * fc_regions_used, update it when do new additions.
1684          */
1685         if (replay && state->fc_regions_used != state->fc_regions_valid)
1686                 state->fc_regions_used = state->fc_regions_valid;
1687         if (state->fc_regions_used == state->fc_regions_size) {
1688                 struct ext4_fc_alloc_region *fc_regions;
1689
1690                 fc_regions = krealloc(state->fc_regions,
1691                                       sizeof(struct ext4_fc_alloc_region) *
1692                                       (state->fc_regions_size +
1693                                        EXT4_FC_REPLAY_REALLOC_INCREMENT),
1694                                       GFP_KERNEL);
1695                 if (!fc_regions)
1696                         return -ENOMEM;
1697                 state->fc_regions_size +=
1698                         EXT4_FC_REPLAY_REALLOC_INCREMENT;
1699                 state->fc_regions = fc_regions;
1700         }
1701         region = &state->fc_regions[state->fc_regions_used++];
1702         region->ino = ino;
1703         region->lblk = lblk;
1704         region->pblk = pblk;
1705         region->len = len;
1706
1707         if (replay)
1708                 state->fc_regions_valid++;
1709
1710         return 0;
1711 }
1712
1713 /* Replay add range tag */
1714 static int ext4_fc_replay_add_range(struct super_block *sb,
1715                                     struct ext4_fc_tl *tl, u8 *val)
1716 {
1717         struct ext4_fc_add_range fc_add_ex;
1718         struct ext4_extent newex, *ex;
1719         struct inode *inode;
1720         ext4_lblk_t start, cur;
1721         int remaining, len;
1722         ext4_fsblk_t start_pblk;
1723         struct ext4_map_blocks map;
1724         struct ext4_ext_path *path = NULL;
1725         int ret;
1726
1727         memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
1728         ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
1729
1730         trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
1731                 le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
1732                 ext4_ext_get_actual_len(ex));
1733
1734         inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
1735         if (IS_ERR(inode)) {
1736                 ext4_debug("Inode not found.");
1737                 return 0;
1738         }
1739
1740         ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1741         if (ret)
1742                 goto out;
1743
1744         start = le32_to_cpu(ex->ee_block);
1745         start_pblk = ext4_ext_pblock(ex);
1746         len = ext4_ext_get_actual_len(ex);
1747
1748         cur = start;
1749         remaining = len;
1750         ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
1751                   start, start_pblk, len, ext4_ext_is_unwritten(ex),
1752                   inode->i_ino);
1753
1754         while (remaining > 0) {
1755                 map.m_lblk = cur;
1756                 map.m_len = remaining;
1757                 map.m_pblk = 0;
1758                 ret = ext4_map_blocks(NULL, inode, &map, 0);
1759
1760                 if (ret < 0)
1761                         goto out;
1762
1763                 if (ret == 0) {
1764                         /* Range is not mapped */
1765                         path = ext4_find_extent(inode, cur, NULL, 0);
1766                         if (IS_ERR(path))
1767                                 goto out;
1768                         memset(&newex, 0, sizeof(newex));
1769                         newex.ee_block = cpu_to_le32(cur);
1770                         ext4_ext_store_pblock(
1771                                 &newex, start_pblk + cur - start);
1772                         newex.ee_len = cpu_to_le16(map.m_len);
1773                         if (ext4_ext_is_unwritten(ex))
1774                                 ext4_ext_mark_unwritten(&newex);
1775                         down_write(&EXT4_I(inode)->i_data_sem);
1776                         ret = ext4_ext_insert_extent(
1777                                 NULL, inode, &path, &newex, 0);
1778                         up_write((&EXT4_I(inode)->i_data_sem));
1779                         ext4_free_ext_path(path);
1780                         if (ret)
1781                                 goto out;
1782                         goto next;
1783                 }
1784
1785                 if (start_pblk + cur - start != map.m_pblk) {
1786                         /*
1787                          * Logical to physical mapping changed. This can happen
1788                          * if this range was removed and then reallocated to
1789                          * map to new physical blocks during a fast commit.
1790                          */
1791                         ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
1792                                         ext4_ext_is_unwritten(ex),
1793                                         start_pblk + cur - start);
1794                         if (ret)
1795                                 goto out;
1796                         /*
1797                          * Mark the old blocks as free since they aren't used
1798                          * anymore. We maintain an array of all the modified
1799                          * inodes. In case these blocks are still used at either
1800                          * a different logical range in the same inode or in
1801                          * some different inode, we will mark them as allocated
1802                          * at the end of the FC replay using our array of
1803                          * modified inodes.
1804                          */
1805                         ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
1806                         goto next;
1807                 }
1808
1809                 /* Range is mapped and needs a state change */
1810                 ext4_debug("Converting from %ld to %d %lld",
1811                                 map.m_flags & EXT4_MAP_UNWRITTEN,
1812                         ext4_ext_is_unwritten(ex), map.m_pblk);
1813                 ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
1814                                         ext4_ext_is_unwritten(ex), map.m_pblk);
1815                 if (ret)
1816                         goto out;
1817                 /*
1818                  * We may have split the extent tree while toggling the state.
1819                  * Try to shrink the extent tree now.
1820                  */
1821                 ext4_ext_replay_shrink_inode(inode, start + len);
1822 next:
1823                 cur += map.m_len;
1824                 remaining -= map.m_len;
1825         }
1826         ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
1827                                         sb->s_blocksize_bits);
1828 out:
1829         iput(inode);
1830         return 0;
1831 }
1832
1833 /* Replay DEL_RANGE tag */
1834 static int
1835 ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
1836                          u8 *val)
1837 {
1838         struct inode *inode;
1839         struct ext4_fc_del_range lrange;
1840         struct ext4_map_blocks map;
1841         ext4_lblk_t cur, remaining;
1842         int ret;
1843
1844         memcpy(&lrange, val, sizeof(lrange));
1845         cur = le32_to_cpu(lrange.fc_lblk);
1846         remaining = le32_to_cpu(lrange.fc_len);
1847
1848         trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
1849                 le32_to_cpu(lrange.fc_ino), cur, remaining);
1850
1851         inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
1852         if (IS_ERR(inode)) {
1853                 ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino));
1854                 return 0;
1855         }
1856
1857         ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1858         if (ret)
1859                 goto out;
1860
1861         ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n",
1862                         inode->i_ino, le32_to_cpu(lrange.fc_lblk),
1863                         le32_to_cpu(lrange.fc_len));
1864         while (remaining > 0) {
1865                 map.m_lblk = cur;
1866                 map.m_len = remaining;
1867
1868                 ret = ext4_map_blocks(NULL, inode, &map, 0);
1869                 if (ret < 0)
1870                         goto out;
1871                 if (ret > 0) {
1872                         remaining -= ret;
1873                         cur += ret;
1874                         ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
1875                 } else {
1876                         remaining -= map.m_len;
1877                         cur += map.m_len;
1878                 }
1879         }
1880
1881         down_write(&EXT4_I(inode)->i_data_sem);
1882         ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
1883                                 le32_to_cpu(lrange.fc_lblk) +
1884                                 le32_to_cpu(lrange.fc_len) - 1);
1885         up_write(&EXT4_I(inode)->i_data_sem);
1886         if (ret)
1887                 goto out;
1888         ext4_ext_replay_shrink_inode(inode,
1889                 i_size_read(inode) >> sb->s_blocksize_bits);
1890         ext4_mark_inode_dirty(NULL, inode);
1891 out:
1892         iput(inode);
1893         return 0;
1894 }
1895
1896 static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
1897 {
1898         struct ext4_fc_replay_state *state;
1899         struct inode *inode;
1900         struct ext4_ext_path *path = NULL;
1901         struct ext4_map_blocks map;
1902         int i, ret, j;
1903         ext4_lblk_t cur, end;
1904
1905         state = &EXT4_SB(sb)->s_fc_replay_state;
1906         for (i = 0; i < state->fc_modified_inodes_used; i++) {
1907                 inode = ext4_iget(sb, state->fc_modified_inodes[i],
1908                         EXT4_IGET_NORMAL);
1909                 if (IS_ERR(inode)) {
1910                         ext4_debug("Inode %d not found.",
1911                                 state->fc_modified_inodes[i]);
1912                         continue;
1913                 }
1914                 cur = 0;
1915                 end = EXT_MAX_BLOCKS;
1916                 if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) {
1917                         iput(inode);
1918                         continue;
1919                 }
1920                 while (cur < end) {
1921                         map.m_lblk = cur;
1922                         map.m_len = end - cur;
1923
1924                         ret = ext4_map_blocks(NULL, inode, &map, 0);
1925                         if (ret < 0)
1926                                 break;
1927
1928                         if (ret > 0) {
1929                                 path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
1930                                 if (!IS_ERR(path)) {
1931                                         for (j = 0; j < path->p_depth; j++)
1932                                                 ext4_mb_mark_bb(inode->i_sb,
1933                                                         path[j].p_block, 1, 1);
1934                                         ext4_free_ext_path(path);
1935                                 }
1936                                 cur += ret;
1937                                 ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
1938                                                         map.m_len, 1);
1939                         } else {
1940                                 cur = cur + (map.m_len ? map.m_len : 1);
1941                         }
1942                 }
1943                 iput(inode);
1944         }
1945 }
1946
1947 /*
1948  * Check if block is in excluded regions for block allocation. The simple
1949  * allocator that runs during replay phase is calls this function to see
1950  * if it is okay to use a block.
1951  */
1952 bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
1953 {
1954         int i;
1955         struct ext4_fc_replay_state *state;
1956
1957         state = &EXT4_SB(sb)->s_fc_replay_state;
1958         for (i = 0; i < state->fc_regions_valid; i++) {
1959                 if (state->fc_regions[i].ino == 0 ||
1960                         state->fc_regions[i].len == 0)
1961                         continue;
1962                 if (in_range(blk, state->fc_regions[i].pblk,
1963                                         state->fc_regions[i].len))
1964                         return true;
1965         }
1966         return false;
1967 }
1968
1969 /* Cleanup function called after replay */
1970 void ext4_fc_replay_cleanup(struct super_block *sb)
1971 {
1972         struct ext4_sb_info *sbi = EXT4_SB(sb);
1973
1974         sbi->s_mount_state &= ~EXT4_FC_REPLAY;
1975         kfree(sbi->s_fc_replay_state.fc_regions);
1976         kfree(sbi->s_fc_replay_state.fc_modified_inodes);
1977 }
1978
1979 static inline bool ext4_fc_tag_len_isvalid(struct ext4_fc_tl *tl,
1980                                            u8 *val, u8 *end)
1981 {
1982         if (val + tl->fc_len > end)
1983                 return false;
1984
1985         /* Here only check ADD_RANGE/TAIL/HEAD which will read data when do
1986          * journal rescan before do CRC check. Other tags length check will
1987          * rely on CRC check.
1988          */
1989         switch (tl->fc_tag) {
1990         case EXT4_FC_TAG_ADD_RANGE:
1991                 return (sizeof(struct ext4_fc_add_range) == tl->fc_len);
1992         case EXT4_FC_TAG_TAIL:
1993                 return (sizeof(struct ext4_fc_tail) <= tl->fc_len);
1994         case EXT4_FC_TAG_HEAD:
1995                 return (sizeof(struct ext4_fc_head) == tl->fc_len);
1996         case EXT4_FC_TAG_DEL_RANGE:
1997         case EXT4_FC_TAG_LINK:
1998         case EXT4_FC_TAG_UNLINK:
1999         case EXT4_FC_TAG_CREAT:
2000         case EXT4_FC_TAG_INODE:
2001         case EXT4_FC_TAG_PAD:
2002         default:
2003                 return true;
2004         }
2005 }
2006
2007 /*
2008  * Recovery Scan phase handler
2009  *
2010  * This function is called during the scan phase and is responsible
2011  * for doing following things:
2012  * - Make sure the fast commit area has valid tags for replay
2013  * - Count number of tags that need to be replayed by the replay handler
2014  * - Verify CRC
2015  * - Create a list of excluded blocks for allocation during replay phase
2016  *
2017  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
2018  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
2019  * to indicate that scan has finished and JBD2 can now start replay phase.
2020  * It returns a negative error to indicate that there was an error. At the end
2021  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
2022  * to indicate the number of tags that need to replayed during the replay phase.
2023  */
2024 static int ext4_fc_replay_scan(journal_t *journal,
2025                                 struct buffer_head *bh, int off,
2026                                 tid_t expected_tid)
2027 {
2028         struct super_block *sb = journal->j_private;
2029         struct ext4_sb_info *sbi = EXT4_SB(sb);
2030         struct ext4_fc_replay_state *state;
2031         int ret = JBD2_FC_REPLAY_CONTINUE;
2032         struct ext4_fc_add_range ext;
2033         struct ext4_fc_tl tl;
2034         struct ext4_fc_tail tail;
2035         __u8 *start, *end, *cur, *val;
2036         struct ext4_fc_head head;
2037         struct ext4_extent *ex;
2038
2039         state = &sbi->s_fc_replay_state;
2040
2041         start = (u8 *)bh->b_data;
2042         end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
2043
2044         if (state->fc_replay_expected_off == 0) {
2045                 state->fc_cur_tag = 0;
2046                 state->fc_replay_num_tags = 0;
2047                 state->fc_crc = 0;
2048                 state->fc_regions = NULL;
2049                 state->fc_regions_valid = state->fc_regions_used =
2050                         state->fc_regions_size = 0;
2051                 /* Check if we can stop early */
2052                 if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
2053                         != EXT4_FC_TAG_HEAD)
2054                         return 0;
2055         }
2056
2057         if (off != state->fc_replay_expected_off) {
2058                 ret = -EFSCORRUPTED;
2059                 goto out_err;
2060         }
2061
2062         state->fc_replay_expected_off++;
2063         for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
2064              cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
2065                 ext4_fc_get_tl(&tl, cur);
2066                 val = cur + EXT4_FC_TAG_BASE_LEN;
2067                 if (!ext4_fc_tag_len_isvalid(&tl, val, end)) {
2068                         ret = state->fc_replay_num_tags ?
2069                                 JBD2_FC_REPLAY_STOP : -ECANCELED;
2070                         goto out_err;
2071                 }
2072                 ext4_debug("Scan phase, tag:%s, blk %lld\n",
2073                            tag2str(tl.fc_tag), bh->b_blocknr);
2074                 switch (tl.fc_tag) {
2075                 case EXT4_FC_TAG_ADD_RANGE:
2076                         memcpy(&ext, val, sizeof(ext));
2077                         ex = (struct ext4_extent *)&ext.fc_ex;
2078                         ret = ext4_fc_record_regions(sb,
2079                                 le32_to_cpu(ext.fc_ino),
2080                                 le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
2081                                 ext4_ext_get_actual_len(ex), 0);
2082                         if (ret < 0)
2083                                 break;
2084                         ret = JBD2_FC_REPLAY_CONTINUE;
2085                         fallthrough;
2086                 case EXT4_FC_TAG_DEL_RANGE:
2087                 case EXT4_FC_TAG_LINK:
2088                 case EXT4_FC_TAG_UNLINK:
2089                 case EXT4_FC_TAG_CREAT:
2090                 case EXT4_FC_TAG_INODE:
2091                 case EXT4_FC_TAG_PAD:
2092                         state->fc_cur_tag++;
2093                         state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2094                                 EXT4_FC_TAG_BASE_LEN + tl.fc_len);
2095                         break;
2096                 case EXT4_FC_TAG_TAIL:
2097                         state->fc_cur_tag++;
2098                         memcpy(&tail, val, sizeof(tail));
2099                         state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2100                                                 EXT4_FC_TAG_BASE_LEN +
2101                                                 offsetof(struct ext4_fc_tail,
2102                                                 fc_crc));
2103                         if (le32_to_cpu(tail.fc_tid) == expected_tid &&
2104                                 le32_to_cpu(tail.fc_crc) == state->fc_crc) {
2105                                 state->fc_replay_num_tags = state->fc_cur_tag;
2106                                 state->fc_regions_valid =
2107                                         state->fc_regions_used;
2108                         } else {
2109                                 ret = state->fc_replay_num_tags ?
2110                                         JBD2_FC_REPLAY_STOP : -EFSBADCRC;
2111                         }
2112                         state->fc_crc = 0;
2113                         break;
2114                 case EXT4_FC_TAG_HEAD:
2115                         memcpy(&head, val, sizeof(head));
2116                         if (le32_to_cpu(head.fc_features) &
2117                                 ~EXT4_FC_SUPPORTED_FEATURES) {
2118                                 ret = -EOPNOTSUPP;
2119                                 break;
2120                         }
2121                         if (le32_to_cpu(head.fc_tid) != expected_tid) {
2122                                 ret = JBD2_FC_REPLAY_STOP;
2123                                 break;
2124                         }
2125                         state->fc_cur_tag++;
2126                         state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2127                                 EXT4_FC_TAG_BASE_LEN + tl.fc_len);
2128                         break;
2129                 default:
2130                         ret = state->fc_replay_num_tags ?
2131                                 JBD2_FC_REPLAY_STOP : -ECANCELED;
2132                 }
2133                 if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
2134                         break;
2135         }
2136
2137 out_err:
2138         trace_ext4_fc_replay_scan(sb, ret, off);
2139         return ret;
2140 }
2141
2142 /*
2143  * Main recovery path entry point.
2144  * The meaning of return codes is similar as above.
2145  */
2146 static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
2147                                 enum passtype pass, int off, tid_t expected_tid)
2148 {
2149         struct super_block *sb = journal->j_private;
2150         struct ext4_sb_info *sbi = EXT4_SB(sb);
2151         struct ext4_fc_tl tl;
2152         __u8 *start, *end, *cur, *val;
2153         int ret = JBD2_FC_REPLAY_CONTINUE;
2154         struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
2155         struct ext4_fc_tail tail;
2156
2157         if (pass == PASS_SCAN) {
2158                 state->fc_current_pass = PASS_SCAN;
2159                 return ext4_fc_replay_scan(journal, bh, off, expected_tid);
2160         }
2161
2162         if (state->fc_current_pass != pass) {
2163                 state->fc_current_pass = pass;
2164                 sbi->s_mount_state |= EXT4_FC_REPLAY;
2165         }
2166         if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
2167                 ext4_debug("Replay stops\n");
2168                 ext4_fc_set_bitmaps_and_counters(sb);
2169                 return 0;
2170         }
2171
2172 #ifdef CONFIG_EXT4_DEBUG
2173         if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
2174                 pr_warn("Dropping fc block %d because max_replay set\n", off);
2175                 return JBD2_FC_REPLAY_STOP;
2176         }
2177 #endif
2178
2179         start = (u8 *)bh->b_data;
2180         end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
2181
2182         for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
2183              cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
2184                 ext4_fc_get_tl(&tl, cur);
2185                 val = cur + EXT4_FC_TAG_BASE_LEN;
2186
2187                 if (state->fc_replay_num_tags == 0) {
2188                         ret = JBD2_FC_REPLAY_STOP;
2189                         ext4_fc_set_bitmaps_and_counters(sb);
2190                         break;
2191                 }
2192
2193                 ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag));
2194                 state->fc_replay_num_tags--;
2195                 switch (tl.fc_tag) {
2196                 case EXT4_FC_TAG_LINK:
2197                         ret = ext4_fc_replay_link(sb, &tl, val);
2198                         break;
2199                 case EXT4_FC_TAG_UNLINK:
2200                         ret = ext4_fc_replay_unlink(sb, &tl, val);
2201                         break;
2202                 case EXT4_FC_TAG_ADD_RANGE:
2203                         ret = ext4_fc_replay_add_range(sb, &tl, val);
2204                         break;
2205                 case EXT4_FC_TAG_CREAT:
2206                         ret = ext4_fc_replay_create(sb, &tl, val);
2207                         break;
2208                 case EXT4_FC_TAG_DEL_RANGE:
2209                         ret = ext4_fc_replay_del_range(sb, &tl, val);
2210                         break;
2211                 case EXT4_FC_TAG_INODE:
2212                         ret = ext4_fc_replay_inode(sb, &tl, val);
2213                         break;
2214                 case EXT4_FC_TAG_PAD:
2215                         trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
2216                                              tl.fc_len, 0);
2217                         break;
2218                 case EXT4_FC_TAG_TAIL:
2219                         trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL,
2220                                              0, tl.fc_len, 0);
2221                         memcpy(&tail, val, sizeof(tail));
2222                         WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
2223                         break;
2224                 case EXT4_FC_TAG_HEAD:
2225                         break;
2226                 default:
2227                         trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0);
2228                         ret = -ECANCELED;
2229                         break;
2230                 }
2231                 if (ret < 0)
2232                         break;
2233                 ret = JBD2_FC_REPLAY_CONTINUE;
2234         }
2235         return ret;
2236 }
2237
2238 void ext4_fc_init(struct super_block *sb, journal_t *journal)
2239 {
2240         /*
2241          * We set replay callback even if fast commit disabled because we may
2242          * could still have fast commit blocks that need to be replayed even if
2243          * fast commit has now been turned off.
2244          */
2245         journal->j_fc_replay_callback = ext4_fc_replay;
2246         if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
2247                 return;
2248         journal->j_fc_cleanup_callback = ext4_fc_cleanup;
2249 }
2250
2251 static const char *fc_ineligible_reasons[] = {
2252         "Extended attributes changed",
2253         "Cross rename",
2254         "Journal flag changed",
2255         "Insufficient memory",
2256         "Swap boot",
2257         "Resize",
2258         "Dir renamed",
2259         "Falloc range op",
2260         "Data journalling",
2261         "FC Commit Failed"
2262 };
2263
2264 int ext4_fc_info_show(struct seq_file *seq, void *v)
2265 {
2266         struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2267         struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2268         int i;
2269
2270         if (v != SEQ_START_TOKEN)
2271                 return 0;
2272
2273         seq_printf(seq,
2274                 "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2275                    stats->fc_num_commits, stats->fc_ineligible_commits,
2276                    stats->fc_numblks,
2277                    div_u64(stats->s_fc_avg_commit_time, 1000));
2278         seq_puts(seq, "Ineligible reasons:\n");
2279         for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2280                 seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2281                         stats->fc_ineligible_reason_count[i]);
2282
2283         return 0;
2284 }
2285
2286 int __init ext4_fc_init_dentry_cache(void)
2287 {
2288         ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2289                                            SLAB_RECLAIM_ACCOUNT);
2290
2291         if (ext4_fc_dentry_cachep == NULL)
2292                 return -ENOMEM;
2293
2294         return 0;
2295 }
2296
2297 void ext4_fc_destroy_dentry_cache(void)
2298 {
2299         kmem_cache_destroy(ext4_fc_dentry_cachep);
2300 }