Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

author Linus Torvalds <[email protected]>

Mon, 11 Apr 2011 22:45:47 +0000 (15:45 -0700)

committer Linus Torvalds <[email protected]>

Mon, 11 Apr 2011 22:45:47 +0000 (15:45 -0700)
author Linus Torvalds <[email protected]>
Mon, 11 Apr 2011 22:45:47 +0000 (15:45 -0700)
committer Linus Torvalds <[email protected]>
Mon, 11 Apr 2011 22:45:47 +0000 (15:45 -0700)
diff --combined fs/ext4/fsync.c

index 4673bc05274f4a9aa3cde9c9d933a2232954d17d,b1f9b5fc93eb22b5f100a47b454e09095ac9259d..e9473cbe80dfd00a7245e13f4de57229011a7bb2
--- 1/fs/ext4/fsync.c
--- 2/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@@ -101,7 -101,7 +101,7 @@@ extern int ext4_flush_completed_IO(stru
                  * to the work-to-be schedule is freed.
                  *
                  * Thus we need to keep the io structure still valid here after
- -               * convertion finished. The io structure has a flag to
+ +               * conversion finished. The io structure has a flag to
                  * avoid double converting from both fsync and background work
                  * queue work.
                  */
@@@ -125,9 -125,11 +125,11 @@@
    * the parent directory's parent as well, and so on recursively, if
    * they are also freshly created.
    */
- static void ext4_sync_parent(struct inode *inode)
+ static int ext4_sync_parent(struct inode *inode)
   {
+       struct writeback_control wbc;
         struct dentry *dentry = NULL;
+       int ret = 0;
   
         while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
                 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
@@@ -136,8 -138,17 +138,17 @@@
                 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
                         break;
                 inode = dentry->d_parent->d_inode;
-               sync_mapping_buffers(inode->i_mapping);
+               ret = sync_mapping_buffers(inode->i_mapping);
+               if (ret)
+                       break;
+               memset(&wbc, 0, sizeof(wbc));
+               wbc.sync_mode = WB_SYNC_ALL;
+               wbc.nr_to_write = 0;         /* only write out the inode */
+               ret = sync_inode(inode, &wbc);
+               if (ret)
+                       break;
         }
+       return ret;
   }
   
   /*
@@@ -176,7 -187,7 +187,7 @@@ int ext4_sync_file(struct file *file, i
         if (!journal) {
                 ret = generic_file_fsync(file, datasync);
                 if (!ret && !list_empty(&inode->i_dentry))
-                       ext4_sync_parent(inode);
+                       ret = ext4_sync_parent(inode);
                 goto out;
         }
   
diff --combined fs/ext4/inode.c

index ad8e303c0d2997c396489d80b8b210fdad67a092,9c8cf811d93afc65e9e3aeb983c3f3f602d18830..f2fa5e8a582caf92dba32df3cfad714920a85721
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -2502,6 -2502,7 +2502,7 @@@ static int ext4_da_get_block_prep(struc
                  * for partial write.
                  */
                 set_buffer_new(bh);
+               set_buffer_mapped(bh);
         }
         return 0;
   }
@@@ -2588,7 -2589,7 +2589,7 @@@ static void ext4_end_io_buffer_write(st
    * because we should have holes filled from ext4_page_mkwrite(). We even don't
    * need to file the inode to the transaction's list in ordered mode because if
    * we are writing back data added by write(), the inode is already there and if
- - * we are writing back data modified via mmap(), noone guarantees in which
+ + * we are writing back data modified via mmap(), no one guarantees in which
    * transaction the data will hit the disk. In case we are journaling data, we
    * cannot start transaction directly because transaction start ranks above page
    * lock so we have to do some magic.
@@@ -2690,7 -2691,7 +2691,7 @@@ static int ext4_writepage(struct page *
   
   /*
    * This is called via ext4_da_writepages() to
- - * calulate the total number of credits to reserve to fit
+ + * calculate the total number of credits to reserve to fit
    * a single extent allocation into a single transaction,
    * ext4_da_writpeages() will loop calling this before
    * the block allocation.
@@@ -3304,7 -3305,7 +3305,7 @@@ int ext4_alloc_da_blocks(struct inode *
          * the pages by calling redirty_page_for_writepage() but that
          * would be ugly in the extreme.  So instead we would need to
          * replicate parts of the code in the above functions,
- -       * simplifying them becuase we wouldn't actually intend to
+ +       * simplifying them because we wouldn't actually intend to
          * write out the pages, but rather only collect contiguous
          * logical block extents, call the multi-block allocator, and
          * then update the buffer heads with the block allocations.
@@@ -3694,7 -3695,7 +3695,7 @@@ retry
    *
    * The unwrritten extents will be converted to written when DIO is completed.
    * For async direct IO, since the IO may still pending when return, we
- - * set up an end_io call back function, which will do the convertion
+ + * set up an end_io call back function, which will do the conversion
    * when async direct IO completed.
    *
    * If the O_DIRECT write will extend the file then add this inode to the
@@@ -3717,7 -3718,7 +3718,7 @@@ static ssize_t ext4_ext_direct_IO(int r
                  * We could direct write to holes and fallocate.
                  *
                  * Allocated blocks to fill the hole are marked as uninitialized
- -               * to prevent paralel buffered read to expose the stale data
+ +               * to prevent parallel buffered read to expose the stale data
                  * before DIO complete the data IO.
                  *
                  * As to previously fallocated extents, ext4 get_block
@@@ -3778,7 -3779,7 +3779,7 @@@
                         int err;
                         /*
                          * for non AIO case, since the IO is already
- -                       * completed, we could do the convertion right here
+ +                       * completed, we could do the conversion right here
                          */
                         err = ext4_convert_unwritten_extents(inode,
                                                              offset, ret);
@@@ -4025,7 -4026,7 +4026,7 @@@ static inline int all_zeroes(__le32 *p
    *
    *    When we do truncate() we may have to clean the ends of several
    *    indirect blocks but leave the blocks themselves alive. Block is
- - *    partially truncated if some data below the new i_size is refered
+ + *    partially truncated if some data below the new i_size is referred
    *    from it (and it is on the path to the first completely truncated
    *    data block, indeed).  We have to free the top of that path along
    *    with everything to the right of the path. Since no allocation
@@@ -4169,7 -4170,7 +4170,7 @@@ out_err
    * @first:    array of block numbers
    * @last:     points immediately past the end of array
    *
- - * We are freeing all blocks refered from that array (numbers are stored as
+ + * We are freeing all blocks referred from that array (numbers are stored as
    * little-endian 32-bit) and updating @inode->i_blocks appropriately.
    *
    * We accumulate contiguous runs of blocks to free.  Conveniently, if these
@@@ -4261,7 -4262,7 +4262,7 @@@ static void ext4_free_data(handle_t *ha
    *    @last:  pointer immediately past the end of array
    *    @depth: depth of the branches to free
    *
- - *    We are freeing all blocks refered from these branches (numbers are
+ + *    We are freeing all blocks referred from these branches (numbers are
    *    stored as little-endian 32-bit) and updating @inode->i_blocks
    *    appropriately.
    */
@@@ -4429,8 -4430,8 +4430,8 @@@ void ext4_truncate(struct inode *inode
         Indirect chain[4];
         Indirect *partial;
         __le32 nr = 0;
-       int n;
-       ext4_lblk_t last_block;
+       int n = 0;
+       ext4_lblk_t last_block, max_block;
         unsigned blocksize = inode->i_sb->s_blocksize;
   
         trace_ext4_truncate_enter(inode);
@@@ -4455,14 -4456,18 +4456,18 @@@
   
         last_block = (inode->i_size + blocksize-1)
                                         >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
+       max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
+                                       >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
   
         if (inode->i_size & (blocksize - 1))
                 if (ext4_block_truncate_page(handle, mapping, inode->i_size))
                         goto out_stop;
   
-       n = ext4_block_to_path(inode, last_block, offsets, NULL);
-       if (n == 0)
-               goto out_stop;  /* error */
+       if (last_block != max_block) {
+               n = ext4_block_to_path(inode, last_block, offsets, NULL);
+               if (n == 0)
+                       goto out_stop;  /* error */
+       }
   
         /*
          * OK.  This truncate is going to happen.  We add the inode to the
@@@ -4493,7 -4498,13 +4498,13 @@@
          */
         ei->i_disksize = inode->i_size;
   
-       if (n == 1) {           /* direct blocks */
+       if (last_block == max_block) {
+               /*
+                * It is unnecessary to free any data blocks if last_block is
+                * equal to the indirect block limit.
+                */
+               goto out_unlock;
+       } else if (n == 1) {            /* direct blocks */
                 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
                                i_data + EXT4_NDIR_BLOCKS);
                 goto do_indirects;
@@@ -4553,6 -4564,7 +4564,7 @@@ do_indirects
                 ;
         }
   
+ out_unlock:
         up_write(&ei->i_data_sem);
         inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
@@@ -5398,13 -5410,12 +5410,12 @@@ static int ext4_indirect_trans_blocks(s
         /* if nrblocks are contiguous */
         if (chunk) {
                 /*
-                * With N contiguous data blocks, it need at most
-                * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
-                * 2 dindirect blocks
-                * 1 tindirect block
+                * With N contiguous data blocks, we need at most
+                * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
+                * 2 dindirect blocks, and 1 tindirect block
                  */
-               indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
-               return indirects + 3;
+               return DIV_ROUND_UP(nrblocks,
+                                   EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
         }
         /*
          * if nrblocks are not contiguous, worse case, each block touch
@@@ -5478,7 -5489,7 +5489,7 @@@ static int ext4_meta_trans_blocks(struc
   }
   
   /*
- - * Calulate the total number of credits to reserve to fit
+ + * Calculate the total number of credits to reserve to fit
    * the modification of a single pages into a single transaction,
    * which may include multiple chunks of block allocations.
    *
diff --combined fs/ext4/super.c

index 056474b7b8e0a364f0dff94912b47f0000692118,7b636ceb878a9974929e94ca7165174f79af2dee..8553dfb310afd7ac2209d186125287e99a867f61
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -242,27 -242,44 +242,44 @@@ static void ext4_put_nojournal(handle_
    * journal_end calls result in the superblock being marked dirty, so
    * that sync() will call the filesystem's write_super callback if
    * appropriate.
+  *
+  * To avoid j_barrier hold in userspace when a user calls freeze(),
+  * ext4 prevents a new handle from being started by s_frozen, which
+  * is in an upper layer.
    */
   handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
   {
         journal_t *journal;
+       handle_t  *handle;
   
         if (sb->s_flags & MS_RDONLY)
                 return ERR_PTR(-EROFS);
   
-       vfs_check_frozen(sb, SB_FREEZE_TRANS);
-       /* Special case here: if the journal has aborted behind our
-        * backs (eg. EIO in the commit thread), then we still need to
-        * take the FS itself readonly cleanly. */
         journal = EXT4_SB(sb)->s_journal;
-       if (journal) {
-               if (is_journal_aborted(journal)) {
-                       ext4_abort(sb, "Detected aborted journal");
-                       return ERR_PTR(-EROFS);
-               }
-               return jbd2_journal_start(journal, nblocks);
+       handle = ext4_journal_current_handle();
+ 
+       /*
+        * If a handle has been started, it should be allowed to
+        * finish, otherwise deadlock could happen between freeze
+        * and others(e.g. truncate) due to the restart of the
+        * journal handle if the filesystem is forzen and active
+        * handles are not stopped.
+        */
+       if (!handle)
+               vfs_check_frozen(sb, SB_FREEZE_TRANS);
+ 
+       if (!journal)
+               return ext4_get_nojournal();
+       /*
+        * Special case here: if the journal has aborted behind our
+        * backs (eg. EIO in the commit thread), then we still need to
+        * take the FS itself readonly cleanly.
+        */
+       if (is_journal_aborted(journal)) {
+               ext4_abort(sb, "Detected aborted journal");
+               return ERR_PTR(-EROFS);
         }
-       return ext4_get_nojournal();
+       return jbd2_journal_start(journal, nblocks);
   }
   
   /*
@@@ -617,7 -634,7 +634,7 @@@ __acquires(bitlock
          * filesystem will have already been marked read/only and the
          * journal has been aborted.  We return 1 as a hint to callers
          * who might what to use the return value from
- -       * ext4_grp_locked_error() to distinguish beween the
+ +       * ext4_grp_locked_error() to distinguish between the
          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
          * aggressively from the ext4 function in question, with a
          * more appropriate error code.
@@@ -2975,6 -2992,12 +2992,12 @@@ static int ext4_register_li_request(str
         mutex_unlock(&ext4_li_info->li_list_mtx);
   
         sbi->s_li_request = elr;
+       /*
+        * set elr to NULL here since it has been inserted to
+        * the request_list and the removal and free of it is
+        * handled by ext4_clear_request_list from now on.
+        */
+       elr = NULL;
   
         if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
                 ret = ext4_run_lazyinit_thread();
@@@ -3385,6 -3408,10 +3408,10 @@@ static int ext4_fill_super(struct super
         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
         spin_lock_init(&sbi->s_next_gen_lock);
   
+       init_timer(&sbi->s_err_report);
+       sbi->s_err_report.function = print_daily_error_info;
+       sbi->s_err_report.data = (unsigned long) sb;
+ 
         err = percpu_counter_init(&sbi->s_freeblocks_counter,
                         ext4_count_free_blocks(sb));
         if (!err) {
@@@ -3646,9 -3673,6 +3673,6 @@@ no_journal
                  "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
                  *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
   
-       init_timer(&sbi->s_err_report);
-       sbi->s_err_report.function = print_daily_error_info;
-       sbi->s_err_report.data = (unsigned long) sb;
         if (es->s_error_count)
                 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
   
@@@ -3672,6 -3696,7 +3696,7 @@@ failed_mount_wq
                 sbi->s_journal = NULL;
         }
   failed_mount3:
+       del_timer(&sbi->s_err_report);
         if (sbi->s_flex_groups) {
                 if (is_vmalloc_addr(sbi->s_flex_groups))
                         vfree(sbi->s_flex_groups);
@@@ -4138,6 -4163,11 +4163,11 @@@ static int ext4_sync_fs(struct super_bl
   /*
    * LVM calls this function before a (read-only) snapshot is created.  This
    * gives us a chance to flush the journal completely and mark the fs clean.
+  *
+  * Note that only this function cannot bring a filesystem to be in a clean
+  * state independently, because ext4 prevents a new handle from being started
+  * by @sb->s_frozen, which stays in an upper layer.  It thus needs help from
+  * the upper layer.
    */
   static int ext4_freeze(struct super_block *sb)
   {
@@@ -4614,17 -4644,30 +4644,30 @@@ static int ext4_quota_on(struct super_b
   
   static int ext4_quota_off(struct super_block *sb, int type)
   {
+       struct inode *inode = sb_dqopt(sb)->files[type];
+       handle_t *handle;
+ 
         /* Force all delayed allocation blocks to be allocated.
          * Caller already holds s_umount sem */
         if (test_opt(sb, DELALLOC))
                 sync_filesystem(sb);
   
+       /* Update modification times of quota files when userspace can
+        * start looking at them */
+       handle = ext4_journal_start(inode, 1);
+       if (IS_ERR(handle))
+               goto out;
+       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+ 
+ out:
         return dquot_quota_off(sb, type);
   }
   
   /* Read data from quotafile - avoid pagecache and such because we cannot afford
    * acquiring the locks... As quota files are never truncated and quota code
- - * itself serializes the operations (and noone else should touch the files)
+ + * itself serializes the operations (and no one else should touch the files)
    * we don't have to be afraid of races */
   static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
                                size_t len, loff_t off)
@@@ -4714,9 -4757,8 +4757,8 @@@ out
         if (inode->i_size < off + len) {
                 i_size_write(inode, off + len);
                 EXT4_I(inode)->i_disksize = inode->i_size;
+               ext4_mark_inode_dirty(handle, inode);
         }
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       ext4_mark_inode_dirty(handle, inode);
         mutex_unlock(&inode->i_mutex);
         return len;
   }
diff --combined fs/jbd2/commit.c

index 20af62f4304bc0624d6be46cefe68747a99689e3,b98e4c1eecffadcb646adb2f473222b36d1c2429..6e28000a4b2168c36d1cee255058ac41b72f34eb
--- 1/fs/jbd2/commit.c
--- 2/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@@ -105,6 -105,8 +105,8 @@@ static int journal_submit_commit_record
         int ret;
         struct timespec now = current_kernel_time();
   
+       *cbh = NULL;
+ 
         if (is_journal_aborted(journal))
                 return 0;
   
@@@ -403,7 -405,7 +405,7 @@@ void jbd2_journal_commit_transaction(jo
          * we do not require it to remember exactly which old buffers it
          * has reserved.  This is consistent with the existing behaviour
          * that multiple jbd2_journal_get_write_access() calls to the same
- -       * buffer are perfectly permissable.
+ +       * buffer are perfectly permissible.
          */
         while (commit_transaction->t_reserved_list) {
                 jh = commit_transaction->t_reserved_list;
@@@ -806,7 -808,7 +808,7 @@@ wait_for_iobuf
                 if (err)
                         __jbd2_journal_abort_hard(journal);
         }
-       if (!err && !is_journal_aborted(journal))
+       if (cbh)
                 err = journal_wait_on_commit_record(journal, cbh);
         if (JBD2_HAS_INCOMPAT_FEATURE(journal,
                                       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
diff --combined fs/jbd2/journal.c

index aba8ebaec25c433dc2a6830d96e0082e1bedc696,33dd3ef0ccd01eb239c4c7af90d7ce4093ad84b6..e0ec3db1c395b6c338acc6f7c14af6a2c11eb067
--- 1/fs/jbd2/journal.c
--- 2/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@@ -917,7 -917,7 +917,7 @@@ journal_t * jbd2_journal_init_dev(struc
         journal->j_wbufsize = n;
         journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
         if (!journal->j_wbuf) {
- -              printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+ +              printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
                         __func__);
                 goto out_err;
         }
@@@ -983,7 -983,7 +983,7 @@@ journal_t * jbd2_journal_init_inode (st
         journal->j_wbufsize = n;
         journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
         if (!journal->j_wbuf) {
- -              printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+ +              printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
                         __func__);
                 goto out_err;
         }
@@@ -2413,10 -2413,12 +2413,12 @@@ const char *jbd2_dev_to_name(dev_t devi
         new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
         if (!new_dev)
                 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
+       bd = bdget(device);
         spin_lock(&devname_cache_lock);
         if (devcache[i]) {
                 if (devcache[i]->device == device) {
                         kfree(new_dev);
+                       bdput(bd);
                         ret = devcache[i]->devname;
                         spin_unlock(&devname_cache_lock);
                         return ret;
@@@ -2425,7 -2427,6 +2427,6 @@@
         }
         devcache[i] = new_dev;
         devcache[i]->device = device;
-       bd = bdget(device);
         if (bd) {
                 bdevname(bd, devcache[i]->devname);
                 bdput(bd);
author	Linus Torvalds <[email protected]>
	Mon, 11 Apr 2011 22:45:47 +0000 (15:45 -0700)
committer	Linus Torvalds <[email protected]>
	Mon, 11 Apr 2011 22:45:47 +0000 (15:45 -0700)
		1	2
fs/ext4/fsync.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/jbd2/commit.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/jbd2/journal.c	patch \|	diff1 \|	diff2 \|	blob \| history