Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

author Linus Torvalds <[email protected]>

Sun, 5 Jul 2015 02:36:06 +0000 (19:36 -0700)

committer Linus Torvalds <[email protected]>

Sun, 5 Jul 2015 02:36:06 +0000 (19:36 -0700)
author Linus Torvalds <[email protected]>
Sun, 5 Jul 2015 02:36:06 +0000 (19:36 -0700)
committer Linus Torvalds <[email protected]>
Sun, 5 Jul 2015 02:36:06 +0000 (19:36 -0700)
diff --combined Documentation/filesystems/porting

index 68f1c9106573f40df371e01d14946c52df98405d,ec5456113072ab93240033a4737953bd753afd76..f24d1b8339576e96c46045f5da8f275ee9250056
--- 1/Documentation/filesystems/porting
--- 2/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@@ -379,10 -379,10 +379,10 @@@ may now be called in rcu-walk mode (nd-
   returned if the filesystem cannot handle rcu-walk. See
   Documentation/filesystems/vfs.txt for more details.
   
- -      permission and check_acl are inode permission checks that are called
- -on many or all directory inodes on the way down a path walk (to check for
- -exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU).
- -See Documentation/filesystems/vfs.txt for more details.
+ +      permission is an inode permission check that is called on many or all
+ +directory inodes on the way down a path walk (to check for exec permission). It
+ +must now be rcu-walk aware (mask & MAY_NOT_BLOCK).  See
+ +Documentation/filesystems/vfs.txt for more details.
    
   --
   [mandatory]
@@@ -500,3 -500,7 +500,7 @@@ in your dentry operations instead
         dentry,  it does not get nameidata at all and it gets called only when cookie
         is non-NULL.  Note that link body isn't available anymore, so if you need it,
         store it as cookie.
+ --
+ [mandatory]
+       __fd_install() & fd_install() can now sleep. Callers should not
+       hold a spinlock or other resources that do not allow a schedule.
diff --combined arch/arc/kernel/troubleshoot.c

index e0cf998932123fae4df27f8c98712f57551d8849,9f80c5adcb689aa03295bf716585a2ba6eb5a99d..807f7d61d7a7cf867bca011251729d1164bd3f33
--- 1/arch/arc/kernel/troubleshoot.c
--- 2/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@@ -14,7 -14,6 +14,7 @@@
   #include <linux/proc_fs.h>
   #include <linux/file.h>
   #include <asm/arcregs.h>
+ +#include <asm/irqflags.h>
   
   /*
    * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
@@@ -35,10 -34,7 +35,10 @@@ static noinline void print_reg_file(lon
                         n += scnprintf(buf + n, len - n, "\n");
   
                 /* because pt_regs has regs reversed: r12..r0, r25..r13 */
- -              reg_rev--;
+ +              if (is_isa_arcv2() && start_num == 0)
+ +                      reg_rev++;
+ +              else
+ +                      reg_rev--;
         }
   
         if (start_num != 0)
@@@ -71,15 -67,12 +71,12 @@@ static void print_task_path_n_nm(struc
         mmput(mm);
   
         if (exe_file) {
-               path = exe_file->f_path;
-               path_get(&exe_file->f_path);
+               path_nm = file_path(exe_file, buf, 255);
                 fput(exe_file);
-               path_nm = d_path(&path, buf, 255);
-               path_put(&path);
         }
   
   done:
-       pr_info("Path: %s\n", path_nm);
+       pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");
   }
   
   static void show_faulting_vma(unsigned long address, char *buf)
@@@ -103,8 -96,7 +100,7 @@@
         if (vma && (vma->vm_start <= address)) {
                 struct file *file = vma->vm_file;
                 if (file) {
-                       struct path *path = &file->f_path;
-                       nm = d_path(path, buf, PAGE_SIZE - 1);
+                       nm = file_path(file, buf, PAGE_SIZE - 1);
                         inode = file_inode(vma->vm_file);
                         dev = inode->i_sb->s_dev;
                         ino = inode->i_ino;
@@@ -156,15 -148,6 +152,15 @@@ static void show_ecr_verbose(struct pt_
                                 ((cause_code == 0x02) ? "Write" : "EX"));
         } else if (vec == ECR_V_INSN_ERR) {
                 pr_cont("Illegal Insn\n");
+ +#ifdef CONFIG_ISA_ARCV2
+ +      } else if (vec == ECR_V_MEM_ERR) {
+ +              if (cause_code == 0x00)
+ +                      pr_cont("Bus Error from Insn Mem\n");
+ +              else if (cause_code == 0x10)
+ +                      pr_cont("Bus Error from Data Mem\n");
+ +              else
+ +                      pr_cont("Bus Error, check PRM\n");
+ +#endif
         } else {
                 pr_cont("Check Programmer's Manual\n");
         }
@@@ -198,20 -181,12 +194,20 @@@ void show_regs(struct pt_regs *regs
   
         pr_info("[STAT32]: 0x%08lx", regs->status32);
   
- -#define STS_BIT(r, bit)       r->status32 & STATUS_##bit##_MASK ? #bit : ""
- -      if (!user_mode(regs))
- -              pr_cont(" : %2s %2s %2s %2s %2s\n",
- -                      STS_BIT(regs, AE), STS_BIT(regs, A2), STS_BIT(regs, A1),
- -                      STS_BIT(regs, E2), STS_BIT(regs, E1));
+ +#define STS_BIT(r, bit)       r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
   
+ +#ifdef CONFIG_ISA_ARCOMPACT
+ +      pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n",
+ +                      (regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+ +                      STS_BIT(regs, DE), STS_BIT(regs, AE),
+ +                      STS_BIT(regs, A2), STS_BIT(regs, A1),
+ +                      STS_BIT(regs, E2), STS_BIT(regs, E1));
+ +#else
+ +      pr_cont(" : %2s%2s%2s%2s\n",
+ +                      STS_BIT(regs, IE),
+ +                      (regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+ +                      STS_BIT(regs, DE), STS_BIT(regs, AE));
+ +#endif
         pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n",
                 regs->bta, regs->sp, regs->fp);
         pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
diff --combined arch/s390/hypfs/inode.c

index 2eeb0a0f506d5d54a90b0f98da581468962c0399,8ffad54372321c156e5066b6e5fd92625d166bd5..b2e5902bd8f4d8e5f4f53cc3e6fad1cb183db7be
--- 1/arch/s390/hypfs/inode.c
--- 2/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@@ -62,18 -62,13 +62,13 @@@ static void hypfs_add_dentry(struct den
         hypfs_last_dentry = dentry;
   }
   
- static inline int hypfs_positive(struct dentry *dentry)
- {
-       return d_really_is_positive(dentry) && !d_unhashed(dentry);
- }
- 
   static void hypfs_remove(struct dentry *dentry)
   {
         struct dentry *parent;
   
         parent = dentry->d_parent;
         mutex_lock(&d_inode(parent)->i_mutex);
-       if (hypfs_positive(dentry)) {
+       if (simple_positive(dentry)) {
                 if (d_is_dir(dentry))
                         simple_rmdir(d_inode(parent), dentry);
                 else
@@@ -456,6 -451,8 +451,6 @@@ static const struct super_operations hy
         .show_options   = hypfs_show_options,
   };
   
- -static struct kobject *s390_kobj;
- -
   static int __init hypfs_init(void)
   {
         int rc;
@@@ -479,16 -476,18 +474,16 @@@
                 rc = -ENODATA;
                 goto fail_hypfs_sprp_exit;
         }
- -      s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
- -      if (!s390_kobj) {
- -              rc = -ENOMEM;
+ +      rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
+ +      if (rc)
                 goto fail_hypfs_diag0c_exit;
- -      }
         rc = register_filesystem(&hypfs_type);
         if (rc)
                 goto fail_filesystem;
         return 0;
   
   fail_filesystem:
- -      kobject_put(s390_kobj);
+ +      sysfs_remove_mount_point(hypervisor_kobj, "s390");
   fail_hypfs_diag0c_exit:
         hypfs_diag0c_exit();
   fail_hypfs_sprp_exit:
@@@ -506,7 -505,7 +501,7 @@@ fail_dbfs_exit
   static void __exit hypfs_exit(void)
   {
         unregister_filesystem(&hypfs_type);
- -      kobject_put(s390_kobj);
+ +      sysfs_remove_mount_point(hypervisor_kobj, "s390");
         hypfs_diag0c_exit();
         hypfs_sprp_exit();
         hypfs_vm_exit();
diff --combined arch/tile/kernel/stack.c

index 35d34635e4f1305473f5cf1990d185216657c59d,8d62cf12c2c027b95e5e67cee9bd72228dfd69f5..402b9c85a894dc4b10982462178dd08529bca49d
--- 1/arch/tile/kernel/stack.c
--- 2/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@@ -23,7 -23,6 +23,7 @@@
   #include <linux/mmzone.h>
   #include <linux/dcache.h>
   #include <linux/fs.h>
+ +#include <linux/hardirq.h>
   #include <linux/string.h>
   #include <asm/backtrace.h>
   #include <asm/page.h>
@@@ -110,7 -109,7 +110,7 @@@ static struct pt_regs *valid_fault_hand
                 if (kbt->verbose)
                         pr_err("  <%s while in user mode>\n", fault);
         } else {
- -              if (kbt->verbose)
+ +              if (kbt->verbose && (p->pc != 0 || p->sp != 0 || p->ex1 != 0))
                         pr_err("  (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
                                p->pc, p->sp, p->ex1);
                 return NULL;
@@@ -120,12 -119,10 +120,12 @@@
         return p;
   }
   
- -/* Is the pc pointing to a sigreturn trampoline? */
- -static int is_sigreturn(unsigned long pc)
+ +/* Is the iterator pointing to a sigreturn trampoline? */
+ +static int is_sigreturn(struct KBacktraceIterator *kbt)
   {
- -      return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn));
+ +      return kbt->task->mm &&
+ +              (kbt->it.pc == ((ulong)kbt->task->mm->context.vdso_base +
+ +                              (ulong)&__vdso_rt_sigreturn));
   }
   
   /* Return a pt_regs pointer for a valid signal handler frame */
@@@ -134,7 -131,7 +134,7 @@@ static struct pt_regs *valid_sigframe(s
   {
         BacktraceIterator *b = &kbt->it;
   
- -      if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET &&
+ +      if (is_sigreturn(kbt) && b->sp < PAGE_OFFSET &&
             b->sp % sizeof(long) == 0) {
                 int retval;
                 pagefault_disable();
@@@ -154,6 -151,11 +154,6 @@@
         return NULL;
   }
   
- -static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
- -{
- -      return is_sigreturn(kbt->it.pc);
- -}
- -
   static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
   {
         struct pt_regs *p;
@@@ -176,7 -178,7 +176,7 @@@ static int KBacktraceIterator_next_item
   {
         for (;;) {
                 do {
- -                      if (!KBacktraceIterator_is_sigreturn(kbt))
+ +                      if (!is_sigreturn(kbt))
                                 return KBT_ONGOING;
                 } while (backtrace_next(&kbt->it));
   
@@@ -332,7 -334,7 +332,7 @@@ static void describe_addr(struct KBackt
         }
   
         if (vma->vm_file) {
-               p = d_path(&vma->vm_file->f_path, buf, bufsize);
+               p = file_path(vma->vm_file, buf, bufsize);
                 if (IS_ERR(p))
                         p = "?";
                 name = kbasename(p);
@@@ -355,50 -357,51 +355,50 @@@
    */
   static bool start_backtrace(void)
   {
- -      if (current->thread.in_backtrace) {
+ +      if (current_thread_info()->in_backtrace) {
                 pr_err("Backtrace requested while in backtrace!\n");
                 return false;
         }
- -      current->thread.in_backtrace = true;
+ +      current_thread_info()->in_backtrace = true;
         return true;
   }
   
   static void end_backtrace(void)
   {
- -      current->thread.in_backtrace = false;
+ +      current_thread_info()->in_backtrace = false;
   }
   
   /*
    * This method wraps the backtracer's more generic support.
    * It is only invoked from the architecture-specific code; show_stack()
- - * and dump_stack() (in entry.S) are architecture-independent entry points.
+ + * and dump_stack() are architecture-independent entry points.
    */
- -void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
+ +void tile_show_stack(struct KBacktraceIterator *kbt)
   {
         int i;
         int have_mmap_sem = 0;
   
         if (!start_backtrace())
                 return;
- -      if (headers) {
- -              /*
- -               * Add a blank line since if we are called from panic(),
- -               * then bust_spinlocks() spit out a space in front of us
- -               * and it will mess up our KERN_ERR.
- -               */
- -              pr_err("Starting stack dump of tid %d, pid %d (%s) on cpu %d at cycle %lld\n",
- -                     kbt->task->pid, kbt->task->tgid, kbt->task->comm,
- -                     raw_smp_processor_id(), get_cycles());
- -      }
         kbt->verbose = 1;
         i = 0;
         for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
                 char namebuf[KSYM_NAME_LEN+100];
                 unsigned long address = kbt->it.pc;
   
- -              /* Try to acquire the mmap_sem as we pass into userspace. */
- -              if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm)
+ +              /*
+ +               * Try to acquire the mmap_sem as we pass into userspace.
+ +               * If we're in an interrupt context, don't even try, since
+ +               * it's not safe to call e.g. d_path() from an interrupt,
+ +               * since it uses spin locks without disabling interrupts.
+ +               * Note we test "kbt->task == current", not "kbt->is_current",
+ +               * since we're checking that "current" will work in d_path().
+ +               */
+ +              if (kbt->task == current && address < PAGE_OFFSET &&
+ +                  !have_mmap_sem && kbt->task->mm && !in_interrupt()) {
                         have_mmap_sem =
                                 down_read_trylock(&kbt->task->mm->mmap_sem);
+ +              }
   
                 describe_addr(kbt, address, have_mmap_sem,
                               namebuf, sizeof(namebuf));
@@@ -413,12 -416,24 +413,12 @@@
         }
         if (kbt->end == KBT_LOOP)
                 pr_err("Stack dump stopped; next frame identical to this one\n");
- -      if (headers)
- -              pr_err("Stack dump complete\n");
         if (have_mmap_sem)
                 up_read(&kbt->task->mm->mmap_sem);
         end_backtrace();
   }
   EXPORT_SYMBOL(tile_show_stack);
   
- -
- -/* This is called from show_regs() and _dump_stack() */
- -void dump_stack_regs(struct pt_regs *regs)
- -{
- -      struct KBacktraceIterator kbt;
- -      KBacktraceIterator_init(&kbt, NULL, regs);
- -      tile_show_stack(&kbt, 1);
- -}
- -EXPORT_SYMBOL(dump_stack_regs);
- -
   static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
                                        ulong pc, ulong lr, ulong sp, ulong r52)
   {
@@@ -430,15 -445,11 +430,15 @@@
         return regs;
   }
   
- -/* This is called from dump_stack() and just converts to pt_regs */
+ +/* Deprecated function currently only used by kernel_double_fault(). */
   void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
   {
+ +      struct KBacktraceIterator kbt;
         struct pt_regs regs;
- -      dump_stack_regs(regs_to_pt_regs(&regs, pc, lr, sp, r52));
+ +
+ +      regs_to_pt_regs(&regs, pc, lr, sp, r52);
+ +      KBacktraceIterator_init(&kbt, NULL, &regs);
+ +      tile_show_stack(&kbt);
   }
   
   /* This is called from KBacktraceIterator_init_current() */
@@@ -450,30 -461,22 +450,30 @@@ void _KBacktraceIterator_init_current(s
                                 regs_to_pt_regs(&regs, pc, lr, sp, r52));
   }
   
- -/* This is called only from kernel/sched/core.c, with esp == NULL */
+ +/*
+ + * Called from sched_show_task() with task != NULL, or dump_stack()
+ + * with task == NULL.  The esp argument is always NULL.
+ + */
   void show_stack(struct task_struct *task, unsigned long *esp)
   {
         struct KBacktraceIterator kbt;
- -      if (task == NULL || task == current)
+ +      if (task == NULL || task == current) {
                 KBacktraceIterator_init_current(&kbt);
- -      else
+ +              KBacktraceIterator_next(&kbt);  /* don't show first frame */
+ +      } else {
                 KBacktraceIterator_init(&kbt, task, NULL);
- -      tile_show_stack(&kbt, 0);
+ +      }
+ +      tile_show_stack(&kbt);
   }
   
   #ifdef CONFIG_STACKTRACE
   
   /* Support generic Linux stack API too */
   
- -void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+ +static void save_stack_trace_common(struct task_struct *task,
+ +                                  struct pt_regs *regs,
+ +                                  bool user,
+ +                                  struct stack_trace *trace)
   {
         struct KBacktraceIterator kbt;
         int skip = trace->skip;
@@@ -481,57 -484,31 +481,57 @@@
   
         if (!start_backtrace())
                 goto done;
- -      if (task == NULL || task == current)
+ +      if (regs != NULL) {
+ +              KBacktraceIterator_init(&kbt, NULL, regs);
+ +      } else if (task == NULL || task == current) {
                 KBacktraceIterator_init_current(&kbt);
- -      else
+ +              skip++;  /* don't show KBacktraceIterator_init_current */
+ +      } else {
                 KBacktraceIterator_init(&kbt, task, NULL);
+ +      }
         for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) {
                 if (skip) {
                         --skip;
                         continue;
                 }
- -              if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET)
+ +              if (i >= trace->max_entries ||
+ +                  (!user && kbt.it.pc < PAGE_OFFSET))
                         break;
                 trace->entries[i++] = kbt.it.pc;
         }
         end_backtrace();
   done:
+ +      if (i < trace->max_entries)
+ +              trace->entries[i++] = ULONG_MAX;
         trace->nr_entries = i;
   }
+ +
+ +void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+ +{
+ +      save_stack_trace_common(task, NULL, false, trace);
+ +}
   EXPORT_SYMBOL(save_stack_trace_tsk);
   
   void save_stack_trace(struct stack_trace *trace)
   {
- -      save_stack_trace_tsk(NULL, trace);
+ +      save_stack_trace_common(NULL, NULL, false, trace);
   }
   EXPORT_SYMBOL_GPL(save_stack_trace);
   
+ +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+ +{
+ +      save_stack_trace_common(NULL, regs, false, trace);
+ +}
+ +
+ +void save_stack_trace_user(struct stack_trace *trace)
+ +{
+ +      /* Trace user stack if we are not a kernel thread. */
+ +      if (current->mm)
+ +              save_stack_trace_common(NULL, task_pt_regs(current),
+ +                                      true, trace);
+ +      else if (trace->nr_entries < trace->max_entries)
+ +              trace->entries[trace->nr_entries++] = ULONG_MAX;
+ +}
   #endif
   
   /* In entry.S */
diff --combined drivers/block/loop.c

index 40580dc7f41cacef42eedafeebe725e96943c91e,0d8ad59413cd88119dd4ee49517f979203a839da..f7a4c9d7f721816666a76e2d667adf9153e162e9
--- 1/drivers/block/loop.c
--- 2/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@@ -86,6 -86,8 +86,6 @@@ static DEFINE_MUTEX(loop_index_mutex)
   static int max_part;
   static int part_shift;
   
- -static struct workqueue_struct *loop_wq;
- -
   static int transfer_xor(struct loop_device *lo, int cmd,
                         struct page *raw_page, unsigned raw_off,
                         struct page *loop_page, unsigned loop_off,
@@@ -474,28 -476,6 +474,28 @@@ static int loop_flush(struct loop_devic
         return loop_switch(lo, NULL);
   }
   
+ +static void loop_reread_partitions(struct loop_device *lo,
+ +                                 struct block_device *bdev)
+ +{
+ +      int rc;
+ +
+ +      /*
+ +       * bd_mutex has been held already in release path, so don't
+ +       * acquire it if this function is called in such case.
+ +       *
+ +       * If the reread partition isn't from release path, lo_refcnt
+ +       * must be at least one and it can only become zero when the
+ +       * current holder is released.
+ +       */
+ +      if (!atomic_read(&lo->lo_refcnt))
+ +              rc = __blkdev_reread_part(bdev);
+ +      else
+ +              rc = blkdev_reread_part(bdev);
+ +      if (rc)
+ +              pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
+ +                      __func__, lo->lo_number, lo->lo_file_name, rc);
+ +}
+ +
   /*
    * loop_change_fd switched the backing store of a loopback device to
    * a new file. This is useful for operating system installers to free up
@@@ -544,7 -524,7 +544,7 @@@ static int loop_change_fd(struct loop_d
   
         fput(old_file);
         if (lo->lo_flags & LO_FLAGS_PARTSCAN)
- -              ioctl_by_bdev(bdev, BLKRRPART, 0);
+ +              loop_reread_partitions(lo, bdev);
         return 0;
   
    out_putf:
@@@ -588,7 -568,7 +588,7 @@@ static ssize_t loop_attr_backing_file_s
   
         spin_lock_irq(&lo->lo_lock);
         if (lo->lo_backing_file)
-               p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
+               p = file_path(lo->lo_backing_file, buf, PAGE_SIZE - 1);
         spin_unlock_irq(&lo->lo_lock);
   
         if (IS_ERR_OR_NULL(p))
@@@ -745,12 -725,6 +745,12 @@@ static int loop_set_fd(struct loop_devi
         size = get_loop_size(lo, file);
         if ((loff_t)(sector_t)size != size)
                 goto out_putf;
+ +      error = -ENOMEM;
+ +      lo->wq = alloc_workqueue("kloopd%d",
+ +                      WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16,
+ +                      lo->lo_number);
+ +      if (!lo->wq)
+ +              goto out_putf;
   
         error = 0;
   
@@@ -781,7 -755,7 +781,7 @@@
         if (part_shift)
                 lo->lo_flags |= LO_FLAGS_PARTSCAN;
         if (lo->lo_flags & LO_FLAGS_PARTSCAN)
- -              ioctl_by_bdev(bdev, BLKRRPART, 0);
+ +              loop_reread_partitions(lo, bdev);
   
         /* Grab the block_device to prevent its destruction after we
          * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
@@@ -853,7 -827,7 +853,7 @@@ static int loop_clr_fd(struct loop_devi
          * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
          * command to fail with EBUSY.
          */
- -      if (lo->lo_refcnt > 1) {
+ +      if (atomic_read(&lo->lo_refcnt) > 1) {
                 lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
                 mutex_unlock(&lo->lo_ctl_mutex);
                 return 0;
@@@ -862,9 -836,6 +862,9 @@@
         if (filp == NULL)
                 return -EINVAL;
   
+ +      /* freeze request queue during the transition */
+ +      blk_mq_freeze_queue(lo->lo_queue);
+ +
         spin_lock_irq(&lo->lo_lock);
         lo->lo_state = Lo_rundown;
         lo->lo_backing_file = NULL;
@@@ -896,15 -867,11 +896,15 @@@
         lo->lo_state = Lo_unbound;
         /* This is safe: open() is still holding a reference. */
         module_put(THIS_MODULE);
+ +      blk_mq_unfreeze_queue(lo->lo_queue);
+ +
         if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
- -              ioctl_by_bdev(bdev, BLKRRPART, 0);
+ +              loop_reread_partitions(lo, bdev);
         lo->lo_flags = 0;
         if (!part_shift)
                 lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
+ +      destroy_workqueue(lo->wq);
+ +      lo->wq = NULL;
         mutex_unlock(&lo->lo_ctl_mutex);
         /*
          * Need not hold lo_ctl_mutex to fput backing file.
@@@ -976,7 -943,7 +976,7 @@@ loop_set_status(struct loop_device *lo
              !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
                 lo->lo_flags |= LO_FLAGS_PARTSCAN;
                 lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
- -              ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+ +              loop_reread_partitions(lo, lo->lo_device);
         }
   
         lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
@@@ -1357,7 -1324,9 +1357,7 @@@ static int lo_open(struct block_device 
                 goto out;
         }
   
- -      mutex_lock(&lo->lo_ctl_mutex);
- -      lo->lo_refcnt++;
- -      mutex_unlock(&lo->lo_ctl_mutex);
+ +      atomic_inc(&lo->lo_refcnt);
   out:
         mutex_unlock(&loop_index_mutex);
         return err;
@@@ -1368,10 -1337,11 +1368,10 @@@ static void lo_release(struct gendisk *
         struct loop_device *lo = disk->private_data;
         int err;
   
- -      mutex_lock(&lo->lo_ctl_mutex);
- -
- -      if (--lo->lo_refcnt)
- -              goto out;
+ +      if (atomic_dec_return(&lo->lo_refcnt))
+ +              return;
   
+ +      mutex_lock(&lo->lo_ctl_mutex);
         if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
                 /*
                  * In autoclear mode, stop the loop thread
@@@ -1388,6 -1358,7 +1388,6 @@@
                 loop_flush(lo);
         }
   
- -out:
         mutex_unlock(&lo->lo_ctl_mutex);
   }
   
@@@ -1454,13 -1425,9 +1454,13 @@@ static int loop_queue_rq(struct blk_mq_
                 const struct blk_mq_queue_data *bd)
   {
         struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ +      struct loop_device *lo = cmd->rq->q->queuedata;
   
         blk_mq_start_request(bd->rq);
   
+ +      if (lo->lo_state != Lo_bound)
+ +              return -EIO;
+ +
         if (cmd->rq->cmd_flags & REQ_WRITE) {
                 struct loop_device *lo = cmd->rq->q->queuedata;
                 bool need_sched = true;
@@@ -1474,9 -1441,9 +1474,9 @@@
                 spin_unlock_irq(&lo->lo_lock);
   
                 if (need_sched)
- -                      queue_work(loop_wq, &lo->write_work);
+ +                      queue_work(lo->wq, &lo->write_work);
         } else {
- -              queue_work(loop_wq, &cmd->read_work);
+ +              queue_work(lo->wq, &cmd->read_work);
         }
   
         return BLK_MQ_RQ_QUEUE_OK;
@@@ -1488,6 -1455,9 +1488,6 @@@ static void loop_handle_cmd(struct loop
         struct loop_device *lo = cmd->rq->q->queuedata;
         int ret = -EIO;
   
- -      if (lo->lo_state != Lo_bound)
- -              goto failed;
- -
         if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
                 goto failed;
   
@@@ -1624,7 -1594,6 +1624,7 @@@ static int loop_add(struct loop_device 
                 disk->flags |= GENHD_FL_NO_PART_SCAN;
         disk->flags |= GENHD_FL_EXT_DEVT;
         mutex_init(&lo->lo_ctl_mutex);
+ +      atomic_set(&lo->lo_refcnt, 0);
         lo->lo_number           = i;
         spin_lock_init(&lo->lo_lock);
         disk->major             = LOOP_MAJOR;
@@@ -1742,7 -1711,7 +1742,7 @@@ static long loop_control_ioctl(struct f
                         mutex_unlock(&lo->lo_ctl_mutex);
                         break;
                 }
- -              if (lo->lo_refcnt > 0) {
+ +              if (atomic_read(&lo->lo_refcnt) > 0) {
                         ret = -EBUSY;
                         mutex_unlock(&lo->lo_ctl_mutex);
                         break;
@@@ -1837,6 -1806,13 +1837,6 @@@ static int __init loop_init(void
                 goto misc_out;
         }
   
- -      loop_wq = alloc_workqueue("kloopd",
- -                      WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
- -      if (!loop_wq) {
- -              err = -ENOMEM;
- -              goto misc_out;
- -      }
- -
         blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
                                   THIS_MODULE, loop_probe, NULL, NULL);
   
@@@ -1874,6 -1850,8 +1874,6 @@@ static void __exit loop_exit(void
         blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
         unregister_blkdev(LOOP_MAJOR, "loop");
   
- -      destroy_workqueue(loop_wq);
- -
         misc_deregister(&loop_misc);
   }
   
diff --combined drivers/md/bitmap.c

index 135a0907e9de413d140e9fb9b793a91b638a1606,3813fdfee4beed18466b96732f6e9a5f2489c7dc..ed2346ddf4c9fb54dafeb92ae9c795a0584444e8
--- 1/drivers/md/bitmap.c
--- 2/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@@ -177,16 -177,11 +177,16 @@@ static struct md_rdev *next_active_rdev
          * nr_pending is 0 and In_sync is clear, the entries we return will
          * still be in the same position on the list when we re-enter
          * list_for_each_entry_continue_rcu.
+ +       *
+ +       * Note that if entered with 'rdev == NULL' to start at the
+ +       * beginning, we temporarily assign 'rdev' to an address which
+ +       * isn't really an rdev, but which can be used by
+ +       * list_for_each_entry_continue_rcu() to find the first entry.
          */
         rcu_read_lock();
         if (rdev == NULL)
                 /* start at the beginning */
- -              rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set);
+ +              rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
         else {
                 /* release the previous rdev and start from there. */
                 rdev_dec_pending(rdev, mddev);
@@@ -839,7 -834,7 +839,7 @@@ static void bitmap_file_kick(struct bit
                 if (bitmap->storage.file) {
                         path = kmalloc(PAGE_SIZE, GFP_KERNEL);
                         if (path)
-                               ptr = d_path(&bitmap->storage.file->f_path,
+                               ptr = file_path(bitmap->storage.file,
                                              path, PAGE_SIZE);
   
                         printk(KERN_ALERT
@@@ -1927,7 -1922,7 +1927,7 @@@ void bitmap_status(struct seq_file *seq
                    chunk_kb ? "KB" : "B");
         if (bitmap->storage.file) {
                 seq_printf(seq, ", file: ");
-               seq_path(seq, &bitmap->storage.file->f_path, " \t\n");
+               seq_file_path(seq, bitmap->storage.file, " \t\n");
         }
   
         seq_printf(seq, "\n");
diff --combined drivers/md/md.c

index df92d30ca054c68a2af9cc3ee299525d1635a0eb,e67f3ac137bf2d5c88ef315293539b06b4d14169..d429c30cd51471c26cb1c07cb3e6a413106133d4
--- 1/drivers/md/md.c
--- 2/drivers/md/md.c
+++ b/drivers/md/md.c
@@@ -2024,6 -2024,7 +2024,6 @@@ static int bind_rdev_to_array(struct md
   {
         char b[BDEVNAME_SIZE];
         struct kobject *ko;
- -      char *s;
         int err;
   
         /* prevent duplicates */
@@@ -2069,7 -2070,8 +2069,7 @@@
                 return -EBUSY;
         }
         bdevname(rdev->bdev,b);
- -      while ( (s=strchr(b, '/')) != NULL)
- -              *s = '!';
+ +      strreplace(b, '/', '!');
   
         rdev->mddev = mddev;
         printk(KERN_INFO "md: bind<%s>\n", b);
@@@ -2628,14 -2630,13 +2628,14 @@@ errors_show(struct md_rdev *rdev, char 
   static ssize_t
   errors_store(struct md_rdev *rdev, const char *buf, size_t len)
   {
- -      char *e;
- -      unsigned long n = simple_strtoul(buf, &e, 10);
- -      if (*buf && (*e == 0 || *e == '\n')) {
- -              atomic_set(&rdev->corrected_errors, n);
- -              return len;
- -      }
- -      return -EINVAL;
+ +      unsigned int n;
+ +      int rv;
+ +
+ +      rv = kstrtouint(buf, 10, &n);
+ +      if (rv < 0)
+ +              return rv;
+ +      atomic_set(&rdev->corrected_errors, n);
+ +      return len;
   }
   static struct rdev_sysfs_entry rdev_errors =
   __ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
@@@ -2652,16 -2653,13 +2652,16 @@@ slot_show(struct md_rdev *rdev, char *p
   static ssize_t
   slot_store(struct md_rdev *rdev, const char *buf, size_t len)
   {
- -      char *e;
+ +      int slot;
         int err;
- -      int slot = simple_strtoul(buf, &e, 10);
+ +
         if (strncmp(buf, "none", 4)==0)
                 slot = -1;
- -      else if (e==buf || (*e && *e!= '\n'))
- -              return -EINVAL;
+ +      else {
+ +              err = kstrtouint(buf, 10, (unsigned int *)&slot);
+ +              if (err < 0)
+ +                      return err;
+ +      }
         if (rdev->mddev->pers && slot == -1) {
                 /* Setting 'slot' on an active array requires also
                  * updating the 'rd%d' link, and communicating
@@@ -3546,12 -3544,12 +3546,12 @@@ layout_show(struct mddev *mddev, char *
   static ssize_t
   layout_store(struct mddev *mddev, const char *buf, size_t len)
   {
- -      char *e;
- -      unsigned long n = simple_strtoul(buf, &e, 10);
+ +      unsigned int n;
         int err;
   
- -      if (!*buf || (*e && *e != '\n'))
- -              return -EINVAL;
+ +      err = kstrtouint(buf, 10, &n);
+ +      if (err < 0)
+ +              return err;
         err = mddev_lock(mddev);
         if (err)
                 return err;
@@@ -3595,12 -3593,12 +3595,12 @@@ static int update_raid_disks(struct mdd
   static ssize_t
   raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
   {
- -      char *e;
+ +      unsigned int n;
         int err;
- -      unsigned long n = simple_strtoul(buf, &e, 10);
   
- -      if (!*buf || (*e && *e != '\n'))
- -              return -EINVAL;
+ +      err = kstrtouint(buf, 10, &n);
+ +      if (err < 0)
+ +              return err;
   
         err = mddev_lock(mddev);
         if (err)
@@@ -3647,12 -3645,12 +3647,12 @@@ chunk_size_show(struct mddev *mddev, ch
   static ssize_t
   chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
   {
+ +      unsigned long n;
         int err;
- -      char *e;
- -      unsigned long n = simple_strtoul(buf, &e, 10);
   
- -      if (!*buf || (*e && *e != '\n'))
- -              return -EINVAL;
+ +      err = kstrtoul(buf, 10, &n);
+ +      if (err < 0)
+ +              return err;
   
         err = mddev_lock(mddev);
         if (err)
@@@ -3690,24 -3688,19 +3690,24 @@@ resync_start_show(struct mddev *mddev, 
   static ssize_t
   resync_start_store(struct mddev *mddev, const char *buf, size_t len)
   {
+ +      unsigned long long n;
         int err;
- -      char *e;
- -      unsigned long long n = simple_strtoull(buf, &e, 10);
+ +
+ +      if (cmd_match(buf, "none"))
+ +              n = MaxSector;
+ +      else {
+ +              err = kstrtoull(buf, 10, &n);
+ +              if (err < 0)
+ +                      return err;
+ +              if (n != (sector_t)n)
+ +                      return -EINVAL;
+ +      }
   
         err = mddev_lock(mddev);
         if (err)
                 return err;
         if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
                 err = -EBUSY;
- -      else if (cmd_match(buf, "none"))
- -              n = MaxSector;
- -      else if (!*buf || (*e && *e != '\n'))
- -              err = -EINVAL;
   
         if (!err) {
                 mddev->recovery_cp = n;
@@@ -3841,7 -3834,7 +3841,7 @@@ array_state_store(struct mddev *mddev, 
                                 err = -EBUSY;
                 }
                 spin_unlock(&mddev->lock);
- -              return err;
+ +              return err ?: len;
         }
         err = mddev_lock(mddev);
         if (err)
@@@ -3943,14 -3936,14 +3943,14 @@@ max_corrected_read_errors_show(struct m
   static ssize_t
   max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
   {
- -      char *e;
- -      unsigned long n = simple_strtoul(buf, &e, 10);
+ +      unsigned int n;
+ +      int rv;
   
- -      if (*buf && (*e == 0 || *e == '\n')) {
- -              atomic_set(&mddev->max_corr_read_errors, n);
- -              return len;
- -      }
- -      return -EINVAL;
+ +      rv = kstrtouint(buf, 10, &n);
+ +      if (rv < 0)
+ +              return rv;
+ +      atomic_set(&mddev->max_corr_read_errors, n);
+ +      return len;
   }
   
   static struct md_sysfs_entry max_corr_read_errors =
@@@ -4012,10 -4005,8 +4012,10 @@@ new_dev_store(struct mddev *mddev, cons
         else
                 rdev = md_import_device(dev, -1, -1);
   
- -      if (IS_ERR(rdev))
+ +      if (IS_ERR(rdev)) {
+ +              mddev_unlock(mddev);
                 return PTR_ERR(rdev);
+ +      }
         err = bind_rdev_to_array(rdev, mddev);
    out:
         if (err)
@@@ -4220,36 -4211,34 +4220,36 @@@ action_store(struct mddev *mddev, cons
         if (!mddev->pers || !mddev->pers->sync_request)
                 return -EINVAL;
   
- -      if (cmd_match(page, "frozen"))
- -              set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- -      else
- -              clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
   
         if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
- -              flush_workqueue(md_misc_wq);
- -              if (mddev->sync_thread) {
- -                      set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- -                      if (mddev_lock(mddev) == 0) {
+ +              if (cmd_match(page, "frozen"))
+ +                      set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ +              else
+ +                      clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ +              if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
+ +                  mddev_lock(mddev) == 0) {
+ +                      flush_workqueue(md_misc_wq);
+ +                      if (mddev->sync_thread) {
+ +                              set_bit(MD_RECOVERY_INTR, &mddev->recovery);
                                 md_reap_sync_thread(mddev);
- -                              mddev_unlock(mddev);
                         }
+ +                      mddev_unlock(mddev);
                 }
         } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
                    test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
                 return -EBUSY;
         else if (cmd_match(page, "resync"))
- -              set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ +              clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
         else if (cmd_match(page, "recover")) {
+ +              clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
- -              set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
         } else if (cmd_match(page, "reshape")) {
                 int err;
                 if (mddev->pers->start_reshape == NULL)
                         return -EINVAL;
                 err = mddev_lock(mddev);
                 if (!err) {
+ +                      clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                         err = mddev->pers->start_reshape(mddev);
                         mddev_unlock(mddev);
                 }
@@@ -4261,7 -4250,6 +4261,7 @@@
                         set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                 else if (!cmd_match(page, "repair"))
                         return -EINVAL;
+ +              clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
                 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
         }
@@@ -4309,18 -4297,15 +4309,18 @@@ sync_min_show(struct mddev *mddev, cha
   static ssize_t
   sync_min_store(struct mddev *mddev, const char *buf, size_t len)
   {
- -      int min;
- -      char *e;
+ +      unsigned int min;
+ +      int rv;
+ +
         if (strncmp(buf, "system", 6)==0) {
- -              mddev->sync_speed_min = 0;
- -              return len;
+ +              min = 0;
+ +      } else {
+ +              rv = kstrtouint(buf, 10, &min);
+ +              if (rv < 0)
+ +                      return rv;
+ +              if (min == 0)
+ +                      return -EINVAL;
         }
- -      min = simple_strtoul(buf, &e, 10);
- -      if (buf == e || (*e && *e != '\n') || min <= 0)
- -              return -EINVAL;
         mddev->sync_speed_min = min;
         return len;
   }
@@@ -4338,18 -4323,15 +4338,18 @@@ sync_max_show(struct mddev *mddev, cha
   static ssize_t
   sync_max_store(struct mddev *mddev, const char *buf, size_t len)
   {
- -      int max;
- -      char *e;
+ +      unsigned int max;
+ +      int rv;
+ +
         if (strncmp(buf, "system", 6)==0) {
- -              mddev->sync_speed_max = 0;
- -              return len;
+ +              max = 0;
+ +      } else {
+ +              rv = kstrtouint(buf, 10, &max);
+ +              if (rv < 0)
+ +                      return rv;
+ +              if (max == 0)
+ +                      return -EINVAL;
         }
- -      max = simple_strtoul(buf, &e, 10);
- -      if (buf == e || (*e && *e != '\n') || max <= 0)
- -              return -EINVAL;
         mddev->sync_speed_max = max;
         return len;
   }
@@@ -4532,13 -4514,12 +4532,13 @@@ suspend_lo_show(struct mddev *mddev, ch
   static ssize_t
   suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
   {
- -      char *e;
- -      unsigned long long new = simple_strtoull(buf, &e, 10);
- -      unsigned long long old;
+ +      unsigned long long old, new;
         int err;
   
- -      if (buf == e || (*e && *e != '\n'))
+ +      err = kstrtoull(buf, 10, &new);
+ +      if (err < 0)
+ +              return err;
+ +      if (new != (sector_t)new)
                 return -EINVAL;
   
         err = mddev_lock(mddev);
@@@ -4575,13 -4556,12 +4575,13 @@@ suspend_hi_show(struct mddev *mddev, ch
   static ssize_t
   suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
   {
- -      char *e;
- -      unsigned long long new = simple_strtoull(buf, &e, 10);
- -      unsigned long long old;
+ +      unsigned long long old, new;
         int err;
   
- -      if (buf == e || (*e && *e != '\n'))
+ +      err = kstrtoull(buf, 10, &new);
+ +      if (err < 0)
+ +              return err;
+ +      if (new != (sector_t)new)
                 return -EINVAL;
   
         err = mddev_lock(mddev);
@@@ -4623,13 -4603,11 +4623,13 @@@ static ssize_
   reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
   {
         struct md_rdev *rdev;
- -      char *e;
+ +      unsigned long long new;
         int err;
- -      unsigned long long new = simple_strtoull(buf, &e, 10);
   
- -      if (buf == e || (*e && *e != '\n'))
+ +      err = kstrtoull(buf, 10, &new);
+ +      if (err < 0)
+ +              return err;
+ +      if (new != (sector_t)new)
                 return -EINVAL;
         err = mddev_lock(mddev);
         if (err)
@@@ -5178,7 -5156,6 +5178,7 @@@ int md_run(struct mddev *mddev
                 mddev_detach(mddev);
                 if (mddev->private)
                         pers->free(mddev, mddev->private);
+ +              mddev->private = NULL;
                 module_put(pers->owner);
                 bitmap_destroy(mddev);
                 return err;
@@@ -5314,7 -5291,6 +5314,7 @@@ static void md_clean(struct mddev *mdde
         mddev->changed = 0;
         mddev->degraded = 0;
         mddev->safemode = 0;
+ +      mddev->private = NULL;
         mddev->merge_check_needed = 0;
         mddev->bitmap_info.offset = 0;
         mddev->bitmap_info.default_offset = 0;
@@@ -5387,7 -5363,6 +5387,7 @@@ static void __md_stop(struct mddev *mdd
         mddev->pers = NULL;
         spin_unlock(&mddev->lock);
         pers->free(mddev, mddev->private);
+ +      mddev->private = NULL;
         if (pers->sync_request && mddev->to_remove == NULL)
                 mddev->to_remove = &md_redundancy_group;
         module_put(pers->owner);
@@@ -5766,7 -5741,7 +5766,7 @@@ static int get_bitmap_file(struct mdde
         /* bitmap disabled, zero the first byte and copy out */
         if (!mddev->bitmap_info.file)
                 file->pathname[0] = '\0';
-       else if ((ptr = d_path(&mddev->bitmap_info.file->f_path,
+       else if ((ptr = file_path(mddev->bitmap_info.file,
                                file->pathname, sizeof(file->pathname))),
                  IS_ERR(ptr))
                 err = PTR_ERR(ptr);
@@@ -6397,7 -6372,7 +6397,7 @@@ static int update_array_info(struct mdd
             mddev->ctime         != info->ctime         ||
             mddev->level         != info->level         ||
   /*        mddev->layout        != info->layout        || */
- -          !mddev->persistent   != info->not_persistent||
+ +          mddev->persistent    != !info->not_persistent ||
             mddev->chunk_sectors != info->chunk_size >> 9 ||
             /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
             ((state^info->state) & 0xfffffe00)
@@@ -8128,15 -8103,6 +8128,15 @@@ void md_check_recovery(struct mddev *md
                 int spares = 0;
   
                 if (mddev->ro) {
+ +                      struct md_rdev *rdev;
+ +                      if (!mddev->external && mddev->in_sync)
+ +                              /* 'Blocked' flag not needed as failed devices
+ +                               * will be recorded if array switched to read/write.
+ +                               * Leaving it set will prevent the device
+ +                               * from being removed.
+ +                               */
+ +                              rdev_for_each(rdev, mddev)
+ +                                      clear_bit(Blocked, &rdev->flags);
                         /* On a read-only array we can:
                          * - remove failed devices
                          * - add already-in_sync devices if the array itself
@@@ -8293,7 -8259,6 +8293,7 @@@ void md_reap_sync_thread(struct mddev *
         if (mddev_is_clustered(mddev))
                 md_cluster_ops->metadata_update_finish(mddev);
         clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ +      clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
         clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
         clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
         clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
@@@ -9044,7 -9009,13 +9044,7 @@@ static int get_ro(char *buffer, struct 
   }
   static int set_ro(const char *val, struct kernel_param *kp)
   {
- -      char *e;
- -      int num = simple_strtoul(val, &e, 10);
- -      if (*val && (*e == '\0' || *e == '\n')) {
- -              start_readonly = num;
- -              return 0;
- -      }
- -      return -EINVAL;
+ +      return kstrtouint(val, 10, (unsigned int *)&start_readonly);
   }
   
   module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
diff --combined fs/binfmt_elf.c

index cd46e415883090747d8238c2a2fbaa9b101dbc5e,5046b62471037dc0a929306de0a7f89d7c8267a9..6b659967898ebc534c1ce8d91ff1179f050e8f34
--- 1/fs/binfmt_elf.c
--- 2/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@@ -918,7 -918,7 +918,7 @@@ static int load_elf_binary(struct linux
                         total_size = total_mapping_size(elf_phdata,
                                                         loc->elf_ex.e_phnum);
                         if (!total_size) {
- -                              error = -EINVAL;
+ +                              retval = -EINVAL;
                                 goto out_free_dentry;
                         }
                 }
@@@ -1530,7 -1530,7 +1530,7 @@@ static int fill_files_note(struct memel
                 file = vma->vm_file;
                 if (!file)
                         continue;
-               filename = d_path(&file->f_path, name_curpos, remaining);
+               filename = file_path(file, name_curpos, remaining);
                 if (IS_ERR(filename)) {
                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
                                 vfree(data);
@@@ -1540,7 -1540,7 +1540,7 @@@
                         continue;
                 }
   
-               /* d_path() fills at the end, move name down */
+               /* file_path() fills at the end, move name down */
                 /* n = strlen(filename) + 1: */
                 n = (name_curpos + remaining) - filename;
                 remaining = filename - name_curpos;
diff --combined fs/block_dev.c

index 4fe10f93db8a3e52ebbb5330e94b80ee92455e1d,12b22ddb22ef04ec150dabf4d6d631400bb79070..198243717da567bd5f47ad7c94ab823a82506c62
--- 1/fs/block_dev.c
--- 2/fs/block_dev.c
+++ b/fs/block_dev.c
@@@ -14,7 -14,6 +14,7 @@@
   #include <linux/device_cgroup.h>
   #include <linux/highmem.h>
   #include <linux/blkdev.h>
+ +#include <linux/backing-dev.h>
   #include <linux/module.h>
   #include <linux/blkpg.h>
   #include <linux/magic.h>
@@@ -43,7 -42,7 +43,7 @@@ static inline struct bdev_inode *BDEV_I
         return container_of(inode, struct bdev_inode, vfs_inode);
   }
   
- -inline struct block_device *I_BDEV(struct inode *inode)
+ +struct block_device *I_BDEV(struct inode *inode)
   {
         return &BDEV_I(inode)->bdev;
   }
@@@ -152,6 -151,9 +152,9 @@@ blkdev_direct_IO(struct kiocb *iocb, st
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
   
+       if (IS_DAX(inode))
+               return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
+                               NULL, DIO_SKIP_DIO_COUNT);
         return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
                                     blkdev_get_block, NULL, NULL,
                                     DIO_SKIP_DIO_COUNT);
@@@ -377,7 -379,7 +380,7 @@@ int bdev_read_page(struct block_device 
                         struct page *page)
   {
         const struct block_device_operations *ops = bdev->bd_disk->fops;
- -      if (!ops->rw_page)
+ +      if (!ops->rw_page || bdev_get_integrity(bdev))
                 return -EOPNOTSUPP;
         return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
   }
@@@ -408,7 -410,7 +411,7 @@@ int bdev_write_page(struct block_devic
         int result;
         int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
         const struct block_device_operations *ops = bdev->bd_disk->fops;
- -      if (!ops->rw_page)
+ +      if (!ops->rw_page || bdev_get_integrity(bdev))
                 return -EOPNOTSUPP;
         set_page_writeback(page);
         result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
@@@ -443,6 -445,12 +446,12 @@@ long bdev_direct_access(struct block_de
         long avail;
         const struct block_device_operations *ops = bdev->bd_disk->fops;
   
+       /*
+        * The device driver is allowed to sleep, in order to make the
+        * memory directly accessible.
+        */
+       might_sleep();
+ 
         if (size < 0)
                 return size;
         if (!ops->direct_access)
@@@ -547,8 -555,7 +556,8 @@@ static struct file_system_type bd_type 
         .kill_sb        = kill_anon_super,
   };
   
- -static struct super_block *blockdev_superblock __read_mostly;
+ +struct super_block *blockdev_superblock __read_mostly;
+ +EXPORT_SYMBOL_GPL(blockdev_superblock);
   
   void __init bdev_cache_init(void)
   {
@@@ -689,6 -696,11 +698,6 @@@ static struct block_device *bd_acquire(
         return bdev;
   }
   
- -int sb_is_blkdev_sb(struct super_block *sb)
- -{
- -      return sb == blockdev_superblock;
- -}
- -
   /* Call when you free inode */
   
   void bd_forget(struct inode *inode)
@@@ -1170,6 -1182,7 +1179,7 @@@ static int __blkdev_get(struct block_de
                 bdev->bd_disk = disk;
                 bdev->bd_queue = disk->queue;
                 bdev->bd_contains = bdev;
+               bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
                 if (!partno) {
                         ret = -ENXIO;
                         bdev->bd_part = disk_get_part(disk, partno);
diff --combined fs/btrfs/file.c

index 795d754327a7277de47d13e0f1426aaa5c7fd85c,86f97282779a20e036286142da21bad7b0465233..b823fac91c9289bc67d3bb5191f4ce96e38294ac
--- 1/fs/btrfs/file.c
--- 2/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@@ -1748,7 -1748,7 +1748,7 @@@ static ssize_t btrfs_file_write_iter(st
         }
   
         current->backing_dev_info = inode_to_bdi(inode);
-       err = file_remove_suid(file);
+       err = file_remove_privs(file);
         if (err) {
                 mutex_unlock(&inode->i_mutex);
                 goto out;
@@@ -1868,7 -1868,6 +1868,7 @@@ int btrfs_sync_file(struct file *file, 
         struct btrfs_log_ctx ctx;
         int ret = 0;
         bool full_sync = 0;
+ +      const u64 len = end - start + 1;
   
         trace_btrfs_sync_file(file, datasync);
   
@@@ -1897,7 -1896,7 +1897,7 @@@
                  * all extents are persisted and the respective file extent
                  * items are in the fs/subvol btree.
                  */
- -              ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
+ +              ret = btrfs_wait_ordered_range(inode, start, len);
         } else {
                 /*
                  * Start any new ordered operations before starting to log the
@@@ -1969,10 -1968,8 +1969,10 @@@
          */
         smp_mb();
         if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
- -          (full_sync && BTRFS_I(inode)->last_trans <=
- -           root->fs_info->last_trans_committed)) {
+ +          (BTRFS_I(inode)->last_trans <=
+ +           root->fs_info->last_trans_committed &&
+ +           (full_sync ||
+ +            !btrfs_have_ordered_extents_in_range(inode, start, len)))) {
                 /*
                  * We'v had everything committed since the last time we were
                  * modified so clear this flag in case it was set for whatever
diff --combined fs/ceph/file.c

index faf92095e105650617d8e465e898f8d22a803d60,e55fe32c6224363c5bfbfedce8fb9c46e09fe48a..8b79d87eaf4675ff91cf05c10a3fc53e70d5b313
--- 1/fs/ceph/file.c
--- 2/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@@ -89,14 -89,13 +89,14 @@@ static int ceph_init_file(struct inode 
         case S_IFDIR:
                 dout("init_file %p %p 0%o (regular)\n", inode, file,
                      inode->i_mode);
- -              cf = kmem_cache_alloc(ceph_file_cachep, GFP_NOFS | __GFP_ZERO);
+ +              cf = kmem_cache_alloc(ceph_file_cachep, GFP_KERNEL | __GFP_ZERO);
                 if (cf == NULL) {
                         ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
                         return -ENOMEM;
                 }
                 cf->fmode = fmode;
                 cf->next_offset = 2;
+ +              cf->readdir_cache_idx = -1;
                 file->private_data = cf;
                 BUG_ON(inode->i_fop->release != ceph_release);
                 break;
@@@ -325,6 -324,7 +325,6 @@@ int ceph_release(struct inode *inode, s
                 ceph_mdsc_put_request(cf->last_readdir);
         kfree(cf->last_name);
         kfree(cf->dir_info);
- -      dput(cf->dentry);
         kmem_cache_free(ceph_file_cachep, cf);
   
         /* wake up anyone waiting for caps on this inode */
@@@ -483,7 -483,7 +483,7 @@@ static ssize_t ceph_sync_read(struct ki
                 }
         } else {
                 num_pages = calc_pages_for(off, len);
- -              pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+ +              pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
                 if (IS_ERR(pages))
                         return PTR_ERR(pages);
                 ret = striped_read(inode, off, len, pages,
@@@ -557,13 -557,13 +557,13 @@@ static void ceph_sync_write_unsafe(stru
    * objects, rollback on failure, etc.)
    */
   static ssize_t
- -ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
+ +ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
+ +                     struct ceph_snap_context *snapc)
   {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file_inode(file);
         struct ceph_inode_info *ci = ceph_inode(inode);
         struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- -      struct ceph_snap_context *snapc;
         struct ceph_vino vino;
         struct ceph_osd_request *req;
         struct page **pages;
@@@ -600,6 -600,7 +600,6 @@@
                 size_t start;
                 ssize_t n;
   
- -              snapc = ci->i_snap_realm->cached_context;
                 vino = ceph_vino(inode);
                 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                             vino, pos, &len, 0,
@@@ -613,7 -614,7 +613,7 @@@
                         break;
                 }
   
- -              osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+ +              osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
   
                 n = iov_iter_get_pages_alloc(from, &pages, len, &start);
                 if (unlikely(n < 0)) {
@@@ -673,13 -674,13 +673,13 @@@
    * objects, rollback on failure, etc.)
    */
   static ssize_t
- -ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
+ +ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
+ +              struct ceph_snap_context *snapc)
   {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file_inode(file);
         struct ceph_inode_info *ci = ceph_inode(inode);
         struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- -      struct ceph_snap_context *snapc;
         struct ceph_vino vino;
         struct ceph_osd_request *req;
         struct page **pages;
@@@ -716,6 -717,7 +716,6 @@@
                 size_t left;
                 int n;
   
- -              snapc = ci->i_snap_realm->cached_context;
                 vino = ceph_vino(inode);
                 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                             vino, pos, &len, 0, 1,
@@@ -734,7 -736,7 +734,7 @@@
                  */
                 num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
   
- -              pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+ +              pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
                 if (IS_ERR(pages)) {
                         ret = PTR_ERR(pages);
                         goto out;
@@@ -858,7 -860,7 +858,7 @@@ again
                 struct page *page = NULL;
                 loff_t i_size;
                 if (retry_op == READ_INLINE) {
- -                      page = __page_cache_alloc(GFP_NOFS);
+ +                      page = __page_cache_alloc(GFP_KERNEL);
                         if (!page)
                                 return -ENOMEM;
                 }
@@@ -939,7 -941,6 +939,7 @@@ static ssize_t ceph_write_iter(struct k
         struct ceph_inode_info *ci = ceph_inode(inode);
         struct ceph_osd_client *osdc =
                 &ceph_sb_to_client(inode->i_sb)->client->osdc;
+ +      struct ceph_cap_flush *prealloc_cf;
         ssize_t count, written = 0;
         int err, want, got;
         loff_t pos;
@@@ -947,10 -948,6 +947,10 @@@
         if (ceph_snap(inode) != CEPH_NOSNAP)
                 return -EROFS;
   
+ +      prealloc_cf = ceph_alloc_cap_flush();
+ +      if (!prealloc_cf)
+ +              return -ENOMEM;
+ +
         mutex_lock(&inode->i_mutex);
   
         /* We can write back this queue in page reclaim */
@@@ -962,7 -959,7 +962,7 @@@
   
         pos = iocb->ki_pos;
         count = iov_iter_count(from);
-       err = file_remove_suid(file);
+       err = file_remove_privs(file);
         if (err)
                 goto out;
   
@@@ -999,30 -996,14 +999,30 @@@ retry_snap
   
         if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
             (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+ +              struct ceph_snap_context *snapc;
                 struct iov_iter data;
                 mutex_unlock(&inode->i_mutex);
+ +
+ +              spin_lock(&ci->i_ceph_lock);
+ +              if (__ceph_have_pending_cap_snap(ci)) {
+ +                      struct ceph_cap_snap *capsnap =
+ +                                      list_last_entry(&ci->i_cap_snaps,
+ +                                                      struct ceph_cap_snap,
+ +                                                      ci_item);
+ +                      snapc = ceph_get_snap_context(capsnap->context);
+ +              } else {
+ +                      BUG_ON(!ci->i_head_snapc);
+ +                      snapc = ceph_get_snap_context(ci->i_head_snapc);
+ +              }
+ +              spin_unlock(&ci->i_ceph_lock);
+ +
                 /* we might need to revert back to that point */
                 data = *from;
                 if (iocb->ki_flags & IOCB_DIRECT)
- -                      written = ceph_sync_direct_write(iocb, &data, pos);
+ +                      written = ceph_sync_direct_write(iocb, &data, pos,
+ +                                                       snapc);
                 else
- -                      written = ceph_sync_write(iocb, &data, pos);
+ +                      written = ceph_sync_write(iocb, &data, pos, snapc);
                 if (written == -EOLDSNAPC) {
                         dout("aio_write %p %llx.%llx %llu~%u"
                                 "got EOLDSNAPC, retrying\n",
@@@ -1033,7 -1014,6 +1033,7 @@@
                 }
                 if (written > 0)
                         iov_iter_advance(from, written);
+ +              ceph_put_snap_context(snapc);
         } else {
                 loff_t old_size = inode->i_size;
                 /*
@@@ -1055,8 -1035,7 +1055,8 @@@
                 int dirty;
                 spin_lock(&ci->i_ceph_lock);
                 ci->i_inline_version = CEPH_INLINE_NONE;
- -              dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+ +              dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+ +                                             &prealloc_cf);
                 spin_unlock(&ci->i_ceph_lock);
                 if (dirty)
                         __mark_inode_dirty(inode, dirty);
@@@ -1080,7 -1059,6 +1080,7 @@@
   out:
         mutex_unlock(&inode->i_mutex);
   out_unlocked:
+ +      ceph_free_cap_flush(prealloc_cf);
         current->backing_dev_info = NULL;
         return written ? written : err;
   }
@@@ -1277,7 -1255,6 +1277,7 @@@ static long ceph_fallocate(struct file 
         struct ceph_inode_info *ci = ceph_inode(inode);
         struct ceph_osd_client *osdc =
                 &ceph_inode_to_client(inode)->client->osdc;
+ +      struct ceph_cap_flush *prealloc_cf;
         int want, got = 0;
         int dirty;
         int ret = 0;
@@@ -1290,10 -1267,6 +1290,10 @@@
         if (!S_ISREG(inode->i_mode))
                 return -EOPNOTSUPP;
   
+ +      prealloc_cf = ceph_alloc_cap_flush();
+ +      if (!prealloc_cf)
+ +              return -ENOMEM;
+ +
         mutex_lock(&inode->i_mutex);
   
         if (ceph_snap(inode) != CEPH_NOSNAP) {
@@@ -1340,8 -1313,7 +1340,8 @@@
         if (!ret) {
                 spin_lock(&ci->i_ceph_lock);
                 ci->i_inline_version = CEPH_INLINE_NONE;
- -              dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+ +              dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+ +                                             &prealloc_cf);
                 spin_unlock(&ci->i_ceph_lock);
                 if (dirty)
                         __mark_inode_dirty(inode, dirty);
@@@ -1350,7 -1322,6 +1350,7 @@@
         ceph_put_cap_refs(ci, got);
   unlock:
         mutex_unlock(&inode->i_mutex);
+ +      ceph_free_cap_flush(prealloc_cf);
         return ret;
   }
   
diff --combined fs/coredump.c

index e52e0064feac8d01c1447969e7e48189af5f2714,5b771b36cc6e79de2883b6e23b138f547bb0b1f6..c5ecde6f3eed975af7756c17cec4f3b1748dbc83
--- 1/fs/coredump.c
--- 2/fs/coredump.c
+++ b/fs/coredump.c
@@@ -70,8 -70,7 +70,8 @@@ static int expand_corename(struct core_
         return 0;
   }
   
- -static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg)
+ +static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt,
+ +                                   va_list arg)
   {
         int free, need;
         va_list arg_copy;
@@@ -94,7 -93,7 +94,7 @@@ again
         return -ENOMEM;
   }
   
- -static int cn_printf(struct core_name *cn, const char *fmt, ...)
+ +static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...)
   {
         va_list arg;
         int ret;
@@@ -106,8 -105,7 +106,8 @@@
         return ret;
   }
   
- -static int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
+ +static __printf(2, 3)
+ +int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
   {
         int cur = cn->used;
         va_list arg;
@@@ -140,7 -138,7 +140,7 @@@ static int cn_print_exe_file(struct cor
                 goto put_exe_file;
         }
   
-       path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+       path = file_path(exe_file, pathbuf, PATH_MAX);
         if (IS_ERR(path)) {
                 ret = PTR_ERR(path);
                 goto free_buf;
@@@ -211,15 -209,11 +211,15 @@@ static int format_corename(struct core_
                                 break;
                         /* uid */
                         case 'u':
- -                              err = cn_printf(cn, "%d", cred->uid);
+ +                              err = cn_printf(cn, "%u",
+ +                                              from_kuid(&init_user_ns,
+ +                                                        cred->uid));
                                 break;
                         /* gid */
                         case 'g':
- -                              err = cn_printf(cn, "%d", cred->gid);
+ +                              err = cn_printf(cn, "%u",
+ +                                              from_kgid(&init_user_ns,
+ +                                                        cred->gid));
                                 break;
                         case 'd':
                                 err = cn_printf(cn, "%d",
@@@ -227,8 -221,7 +227,8 @@@
                                 break;
                         /* signal that caused the coredump */
                         case 's':
- -                              err = cn_printf(cn, "%ld", cprm->siginfo->si_signo);
+ +                              err = cn_printf(cn, "%d",
+ +                                              cprm->siginfo->si_signo);
                                 break;
                         /* UNIX time of coredump */
                         case 't': {
diff --combined fs/dax.c

index 99b5fbc38992db1f88be1a1e48dad4fda584a0c1,37a0c4826c1ae3ef6f017885148502e7ae56f407..c3e21ccfc358b2da1170c15f09a5946afad0ff16
--- 1/fs/dax.c
--- 2/fs/dax.c
+++ b/fs/dax.c
@@@ -155,7 -155,7 +155,7 @@@ static ssize_t dax_io(struct inode *ino
                 }
   
                 if (iov_iter_rw(iter) == WRITE)
-                       len = copy_from_iter(addr, max - pos, iter);
+                       len = copy_from_iter_nocache(addr, max - pos, iter);
                 else if (!hole)
                         len = copy_to_iter(addr, max - pos, iter);
                 else
@@@ -209,7 -209,8 +209,8 @@@ ssize_t dax_do_io(struct kiocb *iocb, s
         }
   
         /* Protects against truncate */
-       inode_dio_begin(inode);
+       if (!(flags & DIO_SKIP_DIO_COUNT))
+               inode_dio_begin(inode);
   
         retval = dax_io(inode, iter, pos, end, get_block, &bh);
   
@@@ -219,7 -220,8 +220,8 @@@
         if ((retval > 0) && end_io)
                 end_io(iocb, pos, retval, bh.b_private);
   
-       inode_dio_end(inode);
+       if (!(flags & DIO_SKIP_DIO_COUNT))
+               inode_dio_end(inode);
    out:
         return retval;
   }
@@@ -309,21 -311,14 +311,21 @@@ static int dax_insert_mapping(struct in
    out:
         i_mmap_unlock_read(mapping);
   
- -      if (bh->b_end_io)
- -              bh->b_end_io(bh, 1);
- -
         return error;
   }
   
- -static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- -                      get_block_t get_block)
+ +/**
+ + * __dax_fault - handle a page fault on a DAX file
+ + * @vma: The virtual memory area where the fault occurred
+ + * @vmf: The description of the fault
+ + * @get_block: The filesystem method used to translate file offsets to blocks
+ + *
+ + * When a page fault occurs, filesystems may call this helper in their
+ + * fault handler for DAX files. __dax_fault() assumes the caller has done all
+ + * the necessary locking for the page fault to proceed successfully.
+ + */
+ +int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ +                      get_block_t get_block, dax_iodone_t complete_unwritten)
   {
         struct file *file = vma->vm_file;
         struct address_space *mapping = file->f_mapping;
@@@ -424,19 -419,7 +426,19 @@@
                 page_cache_release(page);
         }
   
+ +      /*
+ +       * If we successfully insert the new mapping over an unwritten extent,
+ +       * we need to ensure we convert the unwritten extent. If there is an
+ +       * error inserting the mapping, the filesystem needs to leave it as
+ +       * unwritten to prevent exposure of the stale underlying data to
+ +       * userspace, but we still need to call the completion function so
+ +       * the private resources on the mapping buffer can be released. We
+ +       * indicate what the callback should do via the uptodate variable, same
+ +       * as for normal BH based IO completions.
+ +       */
         error = dax_insert_mapping(inode, &bh, vma, vmf);
+ +      if (buffer_unwritten(&bh))
+ +              complete_unwritten(&bh, !error);
   
    out:
         if (error == -ENOMEM)
@@@ -453,7 -436,6 +455,7 @@@
         }
         goto out;
   }
+ +EXPORT_SYMBOL(__dax_fault);
   
   /**
    * dax_fault - handle a page fault on a DAX file
@@@ -465,7 -447,7 +467,7 @@@
    * fault handler for DAX files.
    */
   int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- -                      get_block_t get_block)
+ +            get_block_t get_block, dax_iodone_t complete_unwritten)
   {
         int result;
         struct super_block *sb = file_inode(vma->vm_file)->i_sb;
@@@ -474,7 -456,7 +476,7 @@@
                 sb_start_pagefault(sb);
                 file_update_time(vma->vm_file);
         }
- -      result = do_dax_fault(vma, vmf, get_block);
+ +      result = __dax_fault(vma, vmf, get_block, complete_unwritten);
         if (vmf->flags & FAULT_FLAG_WRITE)
                 sb_end_pagefault(sb);
   
diff --combined fs/dcache.c

index 910968b4b6bf74c53003b1ab81feeaa7ba859d80,c4ce35110704d8cf84289c92152c7dd3ed64b6b2..7a3f3e5f9ceabfc4cad41a5d5478258f7e817dc1
--- 1/fs/dcache.c
--- 2/fs/dcache.c
+++ b/fs/dcache.c
@@@ -322,17 -322,17 +322,17 @@@ static void dentry_free(struct dentry *
   }
   
   /**
- - * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
+ + * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups
    * @dentry: the target dentry
    * After this call, in-progress rcu-walk path lookup will fail. This
    * should be called after unhashing, and after changing d_inode (if
    * the dentry has not already been unhashed).
    */
- -static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
+ +static inline void dentry_rcuwalk_invalidate(struct dentry *dentry)
   {
- -      assert_spin_locked(&dentry->d_lock);
- -      /* Go through a barrier */
- -      write_seqcount_barrier(&dentry->d_seq);
+ +      lockdep_assert_held(&dentry->d_lock);
+ +      /* Go through am invalidation barrier */
+ +      write_seqcount_invalidate(&dentry->d_seq);
   }
   
   /*
@@@ -372,7 -372,7 +372,7 @@@ static void dentry_unlink_inode(struct 
         struct inode *inode = dentry->d_inode;
         __d_clear_type_and_inode(dentry);
         hlist_del_init(&dentry->d_u.d_alias);
- -      dentry_rcuwalk_barrier(dentry);
+ +      dentry_rcuwalk_invalidate(dentry);
         spin_unlock(&dentry->d_lock);
         spin_unlock(&inode->i_lock);
         if (!inode->i_nlink)
@@@ -494,7 -494,7 +494,7 @@@ void __d_drop(struct dentry *dentry
                 __hlist_bl_del(&dentry->d_hash);
                 dentry->d_hash.pprev = NULL;
                 hlist_bl_unlock(b);
- -              dentry_rcuwalk_barrier(dentry);
+ +              dentry_rcuwalk_invalidate(dentry);
         }
   }
   EXPORT_SYMBOL(__d_drop);
@@@ -1673,7 -1673,8 +1673,8 @@@ void d_set_d_op(struct dentry *dentry, 
                                 DCACHE_OP_COMPARE       |
                                 DCACHE_OP_REVALIDATE    |
                                 DCACHE_OP_WEAK_REVALIDATE       |
-                               DCACHE_OP_DELETE ));
+                               DCACHE_OP_DELETE        |
+                               DCACHE_OP_SELECT_INODE));
         dentry->d_op = op;
         if (!op)
                 return;
@@@ -1689,6 -1690,8 +1690,8 @@@
                 dentry->d_flags |= DCACHE_OP_DELETE;
         if (op->d_prune)
                 dentry->d_flags |= DCACHE_OP_PRUNE;
+       if (op->d_select_inode)
+               dentry->d_flags |= DCACHE_OP_SELECT_INODE;
   
   }
   EXPORT_SYMBOL(d_set_d_op);
@@@ -1752,7 -1755,7 +1755,7 @@@ static void __d_instantiate(struct dent
         if (inode)
                 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
         __d_set_inode_and_type(dentry, inode, add_flags);
- -      dentry_rcuwalk_barrier(dentry);
+ +      dentry_rcuwalk_invalidate(dentry);
         spin_unlock(&dentry->d_lock);
         fsnotify_d_instantiate(dentry, inode);
   }
@@@ -2927,6 -2930,17 +2930,6 @@@ restart
                                 vfsmnt = &mnt->mnt;
                                 continue;
                         }
- -                      /*
- -                       * Filesystems needing to implement special "root names"
- -                       * should do so with ->d_dname()
- -                       */
- -                      if (IS_ROOT(dentry) &&
- -                         (dentry->d_name.len != 1 ||
- -                          dentry->d_name.name[0] != '/')) {
- -                              WARN(1, "Root dentry has weird name <%.*s>\n",
- -                                   (int) dentry->d_name.len,
- -                                   dentry->d_name.name);
- -                      }
                         if (!error)
                                 error = is_mounted(vfsmnt) ? 1 : 2;
                         break;
diff --combined fs/debugfs/inode.c

index d6d1cf004123385e49750bd3242b417d09215ae4,ef86ad6bdc3ee952d8171905860af9c9767fa60f..c711be8d6a3cc71a598a92a82026a59f05eccd90
--- 1/fs/debugfs/inode.c
--- 2/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@@ -44,11 -44,6 +44,6 @@@ static struct inode *debugfs_get_inode(
         return inode;
   }
   
- static inline int debugfs_positive(struct dentry *dentry)
- {
-       return d_really_is_positive(dentry) && !d_unhashed(dentry);
- }
- 
   struct debugfs_mount_opts {
         kuid_t uid;
         kgid_t gid;
@@@ -522,7 -517,7 +517,7 @@@ static int __debugfs_remove(struct dent
   {
         int ret = 0;
   
-       if (debugfs_positive(dentry)) {
+       if (simple_positive(dentry)) {
                 dget(dentry);
                 if (d_is_dir(dentry))
                         ret = simple_rmdir(d_inode(parent), dentry);
@@@ -602,7 -597,7 +597,7 @@@ void debugfs_remove_recursive(struct de
          */
         spin_lock(&parent->d_lock);
         list_for_each_entry(child, &parent->d_subdirs, d_child) {
-               if (!debugfs_positive(child))
+               if (!simple_positive(child))
                         continue;
   
                 /* perhaps simple_empty(child) makes more sense */
@@@ -623,7 -618,7 +618,7 @@@
                  * from d_subdirs. When releasing the parent->d_lock we can
                  * no longer trust that the next pointer is valid.
                  * Restart the loop. We'll skip this one with the
-                * debugfs_positive() check.
+                * simple_positive() check.
                  */
                 goto loop;
         }
@@@ -716,17 -711,20 +711,17 @@@ bool debugfs_initialized(void
   }
   EXPORT_SYMBOL_GPL(debugfs_initialized);
   
- -
- -static struct kobject *debug_kobj;
- -
   static int __init debugfs_init(void)
   {
         int retval;
   
- -      debug_kobj = kobject_create_and_add("debug", kernel_kobj);
- -      if (!debug_kobj)
- -              return -EINVAL;
+ +      retval = sysfs_create_mount_point(kernel_kobj, "debug");
+ +      if (retval)
+ +              return retval;
   
         retval = register_filesystem(&debug_fs_type);
         if (retval)
- -              kobject_put(debug_kobj);
+ +              sysfs_remove_mount_point(kernel_kobj, "debug");
         else
                 debugfs_registered = true;
   
diff --combined fs/ext4/super.c

index 5c787647afe2a3817dfbf49949e195218199c8f6,0ae853d2e1f141cb2bde05dde7f089b6eff98c09..58987b5c514b2baf433801dee0525d05c4525906
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -24,7 -24,6 +24,7 @@@
   #include <linux/slab.h>
   #include <linux/init.h>
   #include <linux/blkdev.h>
+ +#include <linux/backing-dev.h>
   #include <linux/parser.h>
   #include <linux/buffer_head.h>
   #include <linux/exportfs.h>
@@@ -295,8 -294,6 +295,8 @@@ static void __save_error_info(struct su
         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
   
         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ +      if (bdev_read_only(sb->s_bdev))
+ +              return;
         es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
         es->s_last_error_time = cpu_to_le32(get_seconds());
         strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
@@@ -452,7 -449,7 +452,7 @@@ void __ext4_error_file(struct file *fil
         es = EXT4_SB(inode->i_sb)->s_es;
         es->s_last_error_ino = cpu_to_le32(inode->i_ino);
         if (ext4_error_ratelimit(inode->i_sb)) {
-               path = d_path(&(file->f_path), pathname, sizeof(pathname));
+               path = file_path(file, pathname, sizeof(pathname));
                 if (IS_ERR(path))
                         path = "(unknown)";
                 va_start(args, fmt);
@@@ -592,17 -589,14 +592,17 @@@ void __ext4_msg(struct super_block *sb
         va_end(args);
   }
   
+ +#define ext4_warning_ratelimit(sb)                                    \
+ +              ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
+ +                           "EXT4-fs warning")
+ +
   void __ext4_warning(struct super_block *sb, const char *function,
                     unsigned int line, const char *fmt, ...)
   {
         struct va_format vaf;
         va_list args;
   
- -      if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
- -                        "EXT4-fs warning"))
+ +      if (!ext4_warning_ratelimit(sb))
                 return;
   
         va_start(args, fmt);
@@@ -613,24 -607,6 +613,24 @@@
         va_end(args);
   }
   
+ +void __ext4_warning_inode(const struct inode *inode, const char *function,
+ +                        unsigned int line, const char *fmt, ...)
+ +{
+ +      struct va_format vaf;
+ +      va_list args;
+ +
+ +      if (!ext4_warning_ratelimit(inode->i_sb))
+ +              return;
+ +
+ +      va_start(args, fmt);
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +      printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
+ +             "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
+ +             function, line, inode->i_ino, current->comm, &vaf);
+ +      va_end(args);
+ +}
+ +
   void __ext4_grp_locked_error(const char *function, unsigned int line,
                              struct super_block *sb, ext4_group_t grp,
                              unsigned long ino, ext4_fsblk_t block,
@@@ -829,7 -805,6 +829,7 @@@ static void ext4_put_super(struct super
                 dump_orphan_list(sb, sbi);
         J_ASSERT(list_empty(&sbi->s_orphan));
   
+ +      sync_blockdev(sb->s_bdev);
         invalidate_bdev(sb->s_bdev);
         if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
                 /*
@@@ -902,8 -877,9 +902,8 @@@ static struct inode *ext4_alloc_inode(s
         atomic_set(&ei->i_unwritten, 0);
         INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
   #ifdef CONFIG_EXT4_FS_ENCRYPTION
- -      ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID;
+ +      ei->i_crypt_info = NULL;
   #endif
- -
         return &ei->vfs_inode;
   }
   
@@@ -980,10 -956,6 +980,10 @@@ void ext4_clear_inode(struct inode *ino
                 jbd2_free_inode(EXT4_I(inode)->jinode);
                 EXT4_I(inode)->jinode = NULL;
         }
+ +#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ +      if (EXT4_I(inode)->i_crypt_info)
+ +              ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info);
+ +#endif
   }
   
   static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@@@ -3446,6 -3418,7 +3446,6 @@@ static int ext4_fill_super(struct super
         unsigned long journal_devnum = 0;
         unsigned long def_mount_opts;
         struct inode *root;
- -      char *cp;
         const char *descr;
         int ret = -ENOMEM;
         int blocksize, clustersize;
@@@ -3474,9 -3447,15 +3474,9 @@@
         if (sb->s_bdev->bd_part)
                 sbi->s_sectors_written_start =
                         part_stat_read(sb->s_bdev->bd_part, sectors[1]);
- -#ifdef CONFIG_EXT4_FS_ENCRYPTION
- -      /* Modes of operations for file and directory encryption. */
- -      sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
- -      sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID;
- -#endif
   
         /* Cleanup superblock name */
- -      for (cp = sb->s_id; (cp = strchr(cp, '/'));)
- -              *cp = '!';
+ +      strreplace(sb->s_id, '/', '!');
   
         /* -EINVAL is default */
         ret = -EINVAL;
@@@ -4086,15 -4065,7 +4086,15 @@@ no_journal
                 }
         }
   
- -      if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) &&
+ +      if ((DUMMY_ENCRYPTION_ENABLED(sbi) ||
+ +           EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) &&
+ +          (blocksize != PAGE_CACHE_SIZE)) {
+ +              ext4_msg(sb, KERN_ERR,
+ +                       "Unsupported blocksize for fs encryption");
+ +              goto failed_mount_wq;
+ +      }
+ +
+ +      if (DUMMY_ENCRYPTION_ENABLED(sbi) &&
             !(sb->s_flags & MS_RDONLY) &&
             !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) {
                 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
@@@ -4970,9 -4941,6 +4970,9 @@@ static int ext4_remount(struct super_bl
                 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
         }
   
+ +      if (*flags & MS_LAZYTIME)
+ +              sb->s_flags |= MS_LAZYTIME;
+ +
         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
                 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
                         err = -EROFS;
@@@ -5440,7 -5408,6 +5440,7 @@@ static ssize_t ext4_quota_write(struct 
         struct inode *inode = sb_dqopt(sb)->files[type];
         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
         int err, offset = off & (sb->s_blocksize - 1);
+ +      int retries = 0;
         struct buffer_head *bh;
         handle_t *handle = journal_current_handle();
   
@@@ -5461,12 -5428,7 +5461,12 @@@
                 return -EIO;
         }
   
- -      bh = ext4_bread(handle, inode, blk, 1);
+ +      do {
+ +              bh = ext4_bread(handle, inode, blk,
+ +                              EXT4_GET_BLOCKS_CREATE |
+ +                              EXT4_GET_BLOCKS_METADATA_NOFAIL);
+ +      } while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
+ +               ext4_should_retry_alloc(inode->i_sb, &retries));
         if (IS_ERR(bh))
                 return PTR_ERR(bh);
         if (!bh)
@@@ -5683,7 -5645,6 +5683,7 @@@ out7
   
   static void __exit ext4_exit_fs(void)
   {
+ +      ext4_exit_crypto();
         ext4_destroy_lazyinit_thread();
         unregister_as_ext2();
         unregister_as_ext3();
diff --combined fs/fuse/file.c

index 014fa8ba2b5189e923557c446be8f150921f9b28,1344647965dc09f83bb67920e801cafbf1e7b564..f523f2f04c196db5b1201a38a6e3222ae40d1724
--- 1/fs/fuse/file.c
--- 2/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@@ -96,17 -96,17 +96,17 @@@ static void fuse_file_put(struct fuse_f
                          * Drop the release request when client does not
                          * implement 'open'
                          */
- -                      req->background = 0;
+ +                      __clear_bit(FR_BACKGROUND, &req->flags);
                         iput(req->misc.release.inode);
                         fuse_put_request(ff->fc, req);
                 } else if (sync) {
- -                      req->background = 0;
+ +                      __clear_bit(FR_BACKGROUND, &req->flags);
                         fuse_request_send(ff->fc, req);
                         iput(req->misc.release.inode);
                         fuse_put_request(ff->fc, req);
                 } else {
                         req->end = fuse_release_end;
- -                      req->background = 1;
+ +                      __set_bit(FR_BACKGROUND, &req->flags);
                         fuse_request_send_background(ff->fc, req);
                 }
                 kfree(ff);
@@@ -299,8 -299,8 +299,8 @@@ void fuse_sync_release(struct fuse_fil
   {
         WARN_ON(atomic_read(&ff->count) > 1);
         fuse_prepare_release(ff, flags, FUSE_RELEASE);
- -      ff->reserved_req->force = 1;
- -      ff->reserved_req->background = 0;
+ +      __set_bit(FR_FORCE, &ff->reserved_req->flags);
+ +      __clear_bit(FR_BACKGROUND, &ff->reserved_req->flags);
         fuse_request_send(ff->fc, ff->reserved_req);
         fuse_put_request(ff->fc, ff->reserved_req);
         kfree(ff);
@@@ -426,7 -426,7 +426,7 @@@ static int fuse_flush(struct file *file
         req->in.numargs = 1;
         req->in.args[0].size = sizeof(inarg);
         req->in.args[0].value = &inarg;
- -      req->force = 1;
+ +      __set_bit(FR_FORCE, &req->flags);
         fuse_request_send(fc, req);
         err = req->out.h.error;
         fuse_put_request(fc, req);
@@@ -1169,7 -1169,7 +1169,7 @@@ static ssize_t fuse_file_write_iter(str
         if (err <= 0)
                 goto out;
   
-       err = file_remove_suid(file);
+       err = file_remove_privs(file);
         if (err)
                 goto out;
   
@@@ -1445,9 -1445,9 +1445,9 @@@ static void fuse_writepage_finish(struc
   
         list_del(&req->writepages_entry);
         for (i = 0; i < req->num_pages; i++) {
- -              dec_bdi_stat(bdi, BDI_WRITEBACK);
+ +              dec_wb_stat(&bdi->wb, WB_WRITEBACK);
                 dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
- -              bdi_writeout_inc(bdi);
+ +              wb_writeout_inc(&bdi->wb);
         }
         wake_up(&fi->page_waitq);
   }
@@@ -1611,8 -1611,7 +1611,8 @@@ static int fuse_writepage_locked(struc
         if (!req)
                 goto err;
   
- -      req->background = 1; /* writeback always goes to bg_queue */
+ +      /* writeback always goes to bg_queue */
+ +      __set_bit(FR_BACKGROUND, &req->flags);
         tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
         if (!tmp_page)
                 goto err_free;
@@@ -1635,7 -1634,7 +1635,7 @@@
         req->end = fuse_writepage_end;
         req->inode = inode;
   
- -      inc_bdi_stat(inode_to_bdi(inode), BDI_WRITEBACK);
+ +      inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
         inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
   
         spin_lock(&fc->lock);
@@@ -1743,15 -1742,16 +1743,15 @@@ static bool fuse_writepage_in_flight(st
                 }
         }
   
- -      if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
- -                                      old_req->state == FUSE_REQ_PENDING)) {
+ +      if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) {
                 struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host);
   
                 copy_highpage(old_req->pages[0], page);
                 spin_unlock(&fc->lock);
   
- -              dec_bdi_stat(bdi, BDI_WRITEBACK);
+ +              dec_wb_stat(&bdi->wb, WB_WRITEBACK);
                 dec_zone_page_state(page, NR_WRITEBACK_TEMP);
- -              bdi_writeout_inc(bdi);
+ +              wb_writeout_inc(&bdi->wb);
                 fuse_writepage_free(fc, new_req);
                 fuse_request_free(new_req);
                 goto out;
@@@ -1830,7 -1830,7 +1830,7 @@@ static int fuse_writepages_fill(struct 
                 req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
                 req->misc.write.next = NULL;
                 req->in.argpages = 1;
- -              req->background = 1;
+ +              __set_bit(FR_BACKGROUND, &req->flags);
                 req->num_pages = 0;
                 req->end = fuse_writepage_end;
                 req->inode = inode;
@@@ -1848,7 -1848,7 +1848,7 @@@
         req->page_descs[req->num_pages].offset = 0;
         req->page_descs[req->num_pages].length = PAGE_SIZE;
   
- -      inc_bdi_stat(inode_to_bdi(inode), BDI_WRITEBACK);
+ +      inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
         inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
   
         err = 0;
diff --combined fs/inode.c

index 069721f0cc0e0b733bb659fb0d7836cd71499690,648e71ce6ec216358e6733c2d6e0141c3d90c6f6..d30640f7a193879d07f4ff2c12efe3b817a386f3
--- 1/fs/inode.c
--- 2/fs/inode.c
+++ b/fs/inode.c
@@@ -224,7 -224,6 +224,7 @@@ EXPORT_SYMBOL(free_inode_nonrcu)
   void __destroy_inode(struct inode *inode)
   {
         BUG_ON(inode_has_buffers(inode));
+ +      inode_detach_wb(inode);
         security_inode_free(inode);
         fsnotify_inode_delete(inode);
         locks_free_lock_context(inode->i_flctx);
@@@ -841,7 -840,11 +841,11 @@@ unsigned int get_next_ino(void
         }
   #endif
   
-       *p = ++res;
+       res++;
+       /* get_next_ino should not provide a 0 inode number */
+       if (unlikely(!res))
+               res++;
+       *p = res;
         put_cpu_var(last_ino);
         return res;
   }
@@@ -1674,7 -1677,31 +1678,31 @@@ int should_remove_suid(struct dentry *d
   }
   EXPORT_SYMBOL(should_remove_suid);
   
- static int __remove_suid(struct dentry *dentry, int kill)
+ /*
+  * Return mask of changes for notify_change() that need to be done as a
+  * response to write or truncate. Return 0 if nothing has to be changed.
+  * Negative value on error (change should be denied).
+  */
+ int dentry_needs_remove_privs(struct dentry *dentry)
+ {
+       struct inode *inode = d_inode(dentry);
+       int mask = 0;
+       int ret;
+ 
+       if (IS_NOSEC(inode))
+               return 0;
+ 
+       mask = should_remove_suid(dentry);
+       ret = security_inode_need_killpriv(dentry);
+       if (ret < 0)
+               return ret;
+       if (ret)
+               mask |= ATTR_KILL_PRIV;
+       return mask;
+ }
+ EXPORT_SYMBOL(dentry_needs_remove_privs);
+ 
+ static int __remove_privs(struct dentry *dentry, int kill)
   {
         struct iattr newattrs;
   
@@@ -1686,33 -1713,32 +1714,32 @@@
         return notify_change(dentry, &newattrs, NULL);
   }
   
- int file_remove_suid(struct file *file)
+ /*
+  * Remove special file priviledges (suid, capabilities) when file is written
+  * to or truncated.
+  */
+ int file_remove_privs(struct file *file)
   {
         struct dentry *dentry = file->f_path.dentry;
         struct inode *inode = d_inode(dentry);
-       int killsuid;
-       int killpriv;
+       int kill;
         int error = 0;
   
         /* Fast path for nothing security related */
         if (IS_NOSEC(inode))
                 return 0;
   
-       killsuid = should_remove_suid(dentry);
-       killpriv = security_inode_need_killpriv(dentry);
- 
-       if (killpriv < 0)
-               return killpriv;
-       if (killpriv)
-               error = security_inode_killpriv(dentry);
-       if (!error && killsuid)
-               error = __remove_suid(dentry, killsuid);
-       if (!error && (inode->i_sb->s_flags & MS_NOSEC))
-               inode->i_flags |= S_NOSEC;
+       kill = file_needs_remove_privs(file);
+       if (kill < 0)
+               return kill;
+       if (kill)
+               error = __remove_privs(dentry, kill);
+       if (!error)
+               inode_has_no_xattr(inode);
   
         return error;
   }
- EXPORT_SYMBOL(file_remove_suid);
+ EXPORT_SYMBOL(file_remove_privs);
   
   /**
    *    file_update_time        -       update mtime and ctime time
@@@ -1967,9 -1993,8 +1994,8 @@@ EXPORT_SYMBOL(inode_dio_wait)
    * inode is being instantiated).  The reason for the cmpxchg() loop
    * --- which wouldn't be necessary if all code paths which modify
    * i_flags actually followed this rule, is that there is at least one
-  * code path which doesn't today --- for example,
-  * __generic_file_aio_write() calls file_remove_suid() without holding
-  * i_mutex --- so we use cmpxchg() out of an abundance of caution.
+  * code path which doesn't today so we use cmpxchg() out of an abundance
+  * of caution.
    *
    * In the long run, i_mutex is overkill, and we should probably look
    * at using the i_lock spinlock to protect i_flags, and then make sure
diff --combined fs/libfs.c

index 88a4cb418756c29c5273432ac0f364c340c7f6f7,4d9e6c118fe15d5926ba344b0220e8c49673d9aa..102edfd39000c15f14594a47b6236ba8edb5ccc8
--- 1/fs/libfs.c
--- 2/fs/libfs.c
+++ b/fs/libfs.c
@@@ -20,11 -20,6 +20,6 @@@
   
   #include "internal.h"
   
- static inline int simple_positive(struct dentry *dentry)
- {
-       return d_really_is_positive(dentry) && !d_unhashed(dentry);
- }
- 
   int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
                    struct kstat *stat)
   {
@@@ -1108,98 -1103,3 +1103,98 @@@ const struct inode_operations simple_sy
         .readlink = generic_readlink
   };
   EXPORT_SYMBOL(simple_symlink_inode_operations);
+ +
+ +/*
+ + * Operations for a permanently empty directory.
+ + */
+ +static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+ +{
+ +      return ERR_PTR(-ENOENT);
+ +}
+ +
+ +static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ +                               struct kstat *stat)
+ +{
+ +      struct inode *inode = d_inode(dentry);
+ +      generic_fillattr(inode, stat);
+ +      return 0;
+ +}
+ +
+ +static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr)
+ +{
+ +      return -EPERM;
+ +}
+ +
+ +static int empty_dir_setxattr(struct dentry *dentry, const char *name,
+ +                            const void *value, size_t size, int flags)
+ +{
+ +      return -EOPNOTSUPP;
+ +}
+ +
+ +static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name,
+ +                                void *value, size_t size)
+ +{
+ +      return -EOPNOTSUPP;
+ +}
+ +
+ +static int empty_dir_removexattr(struct dentry *dentry, const char *name)
+ +{
+ +      return -EOPNOTSUPP;
+ +}
+ +
+ +static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
+ +{
+ +      return -EOPNOTSUPP;
+ +}
+ +
+ +static const struct inode_operations empty_dir_inode_operations = {
+ +      .lookup         = empty_dir_lookup,
+ +      .permission     = generic_permission,
+ +      .setattr        = empty_dir_setattr,
+ +      .getattr        = empty_dir_getattr,
+ +      .setxattr       = empty_dir_setxattr,
+ +      .getxattr       = empty_dir_getxattr,
+ +      .removexattr    = empty_dir_removexattr,
+ +      .listxattr      = empty_dir_listxattr,
+ +};
+ +
+ +static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
+ +{
+ +      /* An empty directory has two entries . and .. at offsets 0 and 1 */
+ +      return generic_file_llseek_size(file, offset, whence, 2, 2);
+ +}
+ +
+ +static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
+ +{
+ +      dir_emit_dots(file, ctx);
+ +      return 0;
+ +}
+ +
+ +static const struct file_operations empty_dir_operations = {
+ +      .llseek         = empty_dir_llseek,
+ +      .read           = generic_read_dir,
+ +      .iterate        = empty_dir_readdir,
+ +      .fsync          = noop_fsync,
+ +};
+ +
+ +
+ +void make_empty_dir_inode(struct inode *inode)
+ +{
+ +      set_nlink(inode, 2);
+ +      inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
+ +      inode->i_uid = GLOBAL_ROOT_UID;
+ +      inode->i_gid = GLOBAL_ROOT_GID;
+ +      inode->i_rdev = 0;
+ +      inode->i_size = 2;
+ +      inode->i_blkbits = PAGE_SHIFT;
+ +      inode->i_blocks = 0;
+ +
+ +      inode->i_op = &empty_dir_inode_operations;
+ +      inode->i_fop = &empty_dir_operations;
+ +}
+ +
+ +bool is_empty_dir_inode(struct inode *inode)
+ +{
+ +      return (inode->i_fop == &empty_dir_operations) &&
+ +              (inode->i_op == &empty_dir_inode_operations);
+ +}
diff --combined fs/nfs/dir.c

index 21457bb0edd62b42af307d5850b336711f41e82f,b9108f4254a70477d14e9f11a3442aaed56cecbe..547308a5ec6f4a738006370e523c751c90927e1b
--- 1/fs/nfs/dir.c
--- 2/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@@ -1470,6 -1470,9 +1470,6 @@@ static int nfs_finish_open(struct nfs_o
   {
         int err;
   
- -      if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
- -              *opened |= FILE_CREATED;
- -
         err = finish_open(file, dentry, do_open, opened);
         if (err)
                 goto out;
@@@ -1768,7 -1771,7 +1768,7 @@@ EXPORT_SYMBOL_GPL(nfs_mkdir)
   
   static void nfs_dentry_handle_enoent(struct dentry *dentry)
   {
-       if (d_really_is_positive(dentry) && !d_unhashed(dentry))
+       if (simple_positive(dentry))
                 d_delete(dentry);
   }
   
diff --combined fs/ntfs/file.c

index 2cd65367076458e84532eebcb67869944e3aa327,182bb93aa79cceb5bfe6312d1b5c104d3ed82657..262561fea923aa2315cffe91af91d12b399ded8c
--- 1/fs/ntfs/file.c
--- 2/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@@ -382,7 -382,7 +382,7 @@@ static ssize_t ntfs_prepare_file_for_wr
         base_ni = ni;
         if (NInoAttr(ni))
                 base_ni = ni->ext.base_ntfs_ino;
-       err = file_remove_suid(file);
+       err = file_remove_privs(file);
         if (unlikely(err))
                 goto out;
         /*
@@@ -525,8 -525,7 +525,8 @@@ static inline int __ntfs_grab_cache_pag
                                 }
                         }
                         err = add_to_page_cache_lru(*cached_page, mapping,
- -                                      index, GFP_KERNEL);
+ +                                      index,
+ +                                      GFP_KERNEL & mapping_gfp_mask(mapping));
                         if (unlikely(err)) {
                                 if (err == -EEXIST)
                                         continue;
diff --combined fs/overlayfs/super.c

index 8a08c582bc22e400a16f395609200ed105d3933f,84c5e27fbfd9c6bd7dcd4467f47c018af33a5614..7466ff339c667ea63ead6bf04f18d5662ef3d142
--- 1/fs/overlayfs/super.c
--- 2/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@@ -273,57 -273,11 +273,58 @@@ static void ovl_dentry_release(struct d
         }
   }
   
+ +static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+ +{
+ +      struct ovl_entry *oe = dentry->d_fsdata;
+ +      unsigned int i;
+ +      int ret = 1;
+ +
+ +      for (i = 0; i < oe->numlower; i++) {
+ +              struct dentry *d = oe->lowerstack[i].dentry;
+ +
+ +              if (d->d_flags & DCACHE_OP_REVALIDATE) {
+ +                      ret = d->d_op->d_revalidate(d, flags);
+ +                      if (ret < 0)
+ +                              return ret;
+ +                      if (!ret) {
+ +                              if (!(flags & LOOKUP_RCU))
+ +                                      d_invalidate(d);
+ +                              return -ESTALE;
+ +                      }
+ +              }
+ +      }
+ +      return 1;
+ +}
+ +
+ +static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
+ +{
+ +      struct ovl_entry *oe = dentry->d_fsdata;
+ +      unsigned int i;
+ +      int ret = 1;
+ +
+ +      for (i = 0; i < oe->numlower; i++) {
+ +              struct dentry *d = oe->lowerstack[i].dentry;
+ +
+ +              if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
+ +                      ret = d->d_op->d_weak_revalidate(d, flags);
+ +                      if (ret <= 0)
+ +                              break;
+ +              }
+ +      }
+ +      return ret;
+ +}
+ +
   static const struct dentry_operations ovl_dentry_operations = {
         .d_release = ovl_dentry_release,
+       .d_select_inode = ovl_d_select_inode,
   };
   
+ +static const struct dentry_operations ovl_reval_dentry_operations = {
+ +      .d_release = ovl_dentry_release,
+ +      .d_revalidate = ovl_dentry_revalidate,
+ +      .d_weak_revalidate = ovl_dentry_weak_revalidate,
+ +};
+ +
   static struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
   {
         size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
@@@ -335,20 -289,6 +336,20 @@@
         return oe;
   }
   
+ +static bool ovl_dentry_remote(struct dentry *dentry)
+ +{
+ +      return dentry->d_flags &
+ +              (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
+ +}
+ +
+ +static bool ovl_dentry_weird(struct dentry *dentry)
+ +{
+ +      return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
+ +                                DCACHE_MANAGE_TRANSIT |
+ +                                DCACHE_OP_HASH |
+ +                                DCACHE_OP_COMPARE);
+ +}
+ +
   static inline struct dentry *ovl_lookup_real(struct dentry *dir,
                                              struct qstr *name)
   {
@@@ -364,10 -304,6 +365,10 @@@
         } else if (!dentry->d_inode) {
                 dput(dentry);
                 dentry = NULL;
+ +      } else if (ovl_dentry_weird(dentry)) {
+ +              dput(dentry);
+ +              /* Don't support traversing automounts and other weirdness */
+ +              dentry = ERR_PTR(-EREMOTE);
         }
         return dentry;
   }
@@@ -415,11 -351,6 +416,11 @@@ struct dentry *ovl_lookup(struct inode 
                         goto out;
   
                 if (this) {
+ +                      if (unlikely(ovl_dentry_remote(this))) {
+ +                              dput(this);
+ +                              err = -EREMOTE;
+ +                              goto out;
+ +                      }
                         if (ovl_is_whiteout(this)) {
                                 dput(this);
                                 this = NULL;
@@@ -599,7 -530,7 +600,7 @@@ static int ovl_remount(struct super_blo
   {
         struct ovl_fs *ufs = sb->s_fs_info;
   
- -      if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
+ +      if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
                 return -EROFS;
   
         return 0;
@@@ -764,6 -695,25 +765,6 @@@ static void ovl_unescape(char *s
         }
   }
   
- -static bool ovl_is_allowed_fs_type(struct dentry *root)
- -{
- -      const struct dentry_operations *dop = root->d_op;
- -
- -      /*
- -       * We don't support:
- -       *  - automount filesystems
- -       *  - filesystems with revalidate (FIXME for lower layer)
- -       *  - filesystems with case insensitive names
- -       */
- -      if (dop &&
- -          (dop->d_manage || dop->d_automount ||
- -           dop->d_revalidate || dop->d_weak_revalidate ||
- -           dop->d_compare || dop->d_hash)) {
- -              return false;
- -      }
- -      return true;
- -}
- -
   static int ovl_mount_dir_noesc(const char *name, struct path *path)
   {
         int err = -EINVAL;
@@@ -778,7 -728,7 +779,7 @@@
                 goto out;
         }
         err = -EINVAL;
- -      if (!ovl_is_allowed_fs_type(path->dentry)) {
+ +      if (ovl_dentry_weird(path->dentry)) {
                 pr_err("overlayfs: filesystem on '%s' not supported\n", name);
                 goto out_put;
         }
@@@ -802,21 -752,13 +803,21 @@@ static int ovl_mount_dir(const char *na
         if (tmp) {
                 ovl_unescape(tmp);
                 err = ovl_mount_dir_noesc(tmp, path);
+ +
+ +              if (!err)
+ +                      if (ovl_dentry_remote(path->dentry)) {
+ +                              pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
+ +                                     tmp);
+ +                              path_put(path);
+ +                              err = -EINVAL;
+ +                      }
                 kfree(tmp);
         }
         return err;
   }
   
   static int ovl_lower_dir(const char *name, struct path *path, long *namelen,
- -                       int *stack_depth)
+ +                       int *stack_depth, bool *remote)
   {
         int err;
         struct kstatfs statfs;
@@@ -833,9 -775,6 +834,9 @@@
         *namelen = max(*namelen, statfs.f_namelen);
         *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
   
+ +      if (ovl_dentry_remote(path->dentry))
+ +              *remote = true;
+ +
         return 0;
   
   out_put:
@@@ -889,7 -828,6 +890,7 @@@ static int ovl_fill_super(struct super_
         unsigned int numlower;
         unsigned int stacklen = 0;
         unsigned int i;
+ +      bool remote = false;
         int err;
   
         err = -ENOMEM;
@@@ -963,8 -901,7 +964,8 @@@
         lower = lowertmp;
         for (numlower = 0; numlower < stacklen; numlower++) {
                 err = ovl_lower_dir(lower, &stack[numlower],
- -                                  &ufs->lower_namelen, &sb->s_stack_depth);
+ +                                  &ufs->lower_namelen, &sb->s_stack_depth,
+ +                                  &remote);
                 if (err)
                         goto out_put_lowerpath;
   
@@@ -989,10 -926,9 +990,10 @@@
                 ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
                 err = PTR_ERR(ufs->workdir);
                 if (IS_ERR(ufs->workdir)) {
- -                      pr_err("overlayfs: failed to create directory %s/%s\n",
- -                             ufs->config.workdir, OVL_WORKDIR_NAME);
- -                      goto out_put_upper_mnt;
+ +                      pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+ +                              ufs->config.workdir, OVL_WORKDIR_NAME, -err);
+ +                      sb->s_flags |= MS_RDONLY;
+ +                      ufs->workdir = NULL;
                 }
         }
   
@@@ -1022,10 -958,7 +1023,10 @@@
         if (!ufs->upper_mnt)
                 sb->s_flags |= MS_RDONLY;
   
- -      sb->s_d_op = &ovl_dentry_operations;
+ +      if (remote)
+ +              sb->s_d_op = &ovl_reval_dentry_operations;
+ +      else
+ +              sb->s_d_op = &ovl_dentry_operations;
   
         err = -ENOMEM;
         oe = ovl_alloc_entry(numlower);
@@@ -1065,6 -998,7 +1066,6 @@@ out_put_lower_mnt
         kfree(ufs->lower_mnt);
   out_put_workdir:
         dput(ufs->workdir);
- -out_put_upper_mnt:
         mntput(ufs->upper_mnt);
   out_put_lowerpath:
         for (i = 0; i < numlower; i++)
diff --combined fs/seq_file.c

index 1d9c1cbd4d0b4a1a17dd924e876c854b8d722169,d8a0545ad7ea43c08eefa531ab58b59073e6c422..ce9e39fd5dafc768c27b2ceaa4e69a02c3ed1e6e
--- 1/fs/seq_file.c
--- 2/fs/seq_file.c
+++ b/fs/seq_file.c
@@@ -48,21 -48,18 +48,21 @@@ static void *seq_buf_alloc(unsigned lon
    *    ERR_PTR(error).  In the end of sequence they return %NULL. ->show()
    *    returns 0 in case of success and negative number in case of error.
    *    Returning SEQ_SKIP means "discard this element and move on".
+ + *    Note: seq_open() will allocate a struct seq_file and store its
+ + *    pointer in @file->private_data. This pointer should not be modified.
    */
   int seq_open(struct file *file, const struct seq_operations *op)
   {
- -      struct seq_file *p = file->private_data;
+ +      struct seq_file *p;
+ +
+ +      WARN_ON(file->private_data);
+ +
+ +      p = kzalloc(sizeof(*p), GFP_KERNEL);
+ +      if (!p)
+ +              return -ENOMEM;
+ +
+ +      file->private_data = p;
   
- -      if (!p) {
- -              p = kmalloc(sizeof(*p), GFP_KERNEL);
- -              if (!p)
- -                      return -ENOMEM;
- -              file->private_data = p;
- -      }
- -      memset(p, 0, sizeof(*p));
         mutex_init(&p->lock);
         p->op = op;
   #ifdef CONFIG_USER_NS
@@@ -490,6 -487,20 +490,20 @@@ int seq_path(struct seq_file *m, const 
   }
   EXPORT_SYMBOL(seq_path);
   
+ /**
+  * seq_file_path - seq_file interface to print a pathname of a file
+  * @m: the seq_file handle
+  * @file: the struct file to print
+  * @esc: set of characters to escape in the output
+  *
+  * return the absolute path to the file.
+  */
+ int seq_file_path(struct seq_file *m, struct file *file, const char *esc)
+ {
+       return seq_path(m, &file->f_path, esc);
+ }
+ EXPORT_SYMBOL(seq_file_path);
+ 
   /*
    * Same as seq_path, but relative to supplied root.
    */
@@@ -541,7 -552,6 +555,7 @@@ int seq_dentry(struct seq_file *m, stru
   
         return res;
   }
+ +EXPORT_SYMBOL(seq_dentry);
   
   static void *single_start(struct seq_file *p, loff_t *pos)
   {
diff --combined fs/tracefs/inode.c

index a43df11a163f17fb743a23aaca2c84a296ec50b0,6e8a1400d6629c1973b4fb9f66256949527ae0d2..cbc8d5d2755a691a560c46f7105e85ca6220d835
--- 1/fs/tracefs/inode.c
--- 2/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@@ -496,16 -496,11 +496,11 @@@ struct dentry *tracefs_create_instance_
         return dentry;
   }
   
- static inline int tracefs_positive(struct dentry *dentry)
- {
-       return dentry->d_inode && !d_unhashed(dentry);
- }
- 
   static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
   {
         int ret = 0;
   
-       if (tracefs_positive(dentry)) {
+       if (simple_positive(dentry)) {
                 if (dentry->d_inode) {
                         dget(dentry);
                         switch (dentry->d_inode->i_mode & S_IFMT) {
@@@ -582,7 -577,7 +577,7 @@@ void tracefs_remove_recursive(struct de
          */
         spin_lock(&parent->d_lock);
         list_for_each_entry(child, &parent->d_subdirs, d_child) {
-               if (!tracefs_positive(child))
+               if (!simple_positive(child))
                         continue;
   
                 /* perhaps simple_empty(child) makes more sense */
@@@ -603,7 -598,7 +598,7 @@@
                  * from d_subdirs. When releasing the parent->d_lock we can
                  * no longer trust that the next pointer is valid.
                  * Restart the loop. We'll skip this one with the
-                * tracefs_positive() check.
+                * simple_positive() check.
                  */
                 goto loop;
         }
@@@ -631,12 -626,14 +626,12 @@@ bool tracefs_initialized(void
         return tracefs_registered;
   }
   
- -static struct kobject *trace_kobj;
- -
   static int __init tracefs_init(void)
   {
         int retval;
   
- -      trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
- -      if (!trace_kobj)
+ +      retval = sysfs_create_mount_point(kernel_kobj, "tracing");
+ +      if (retval)
                 return -EINVAL;
   
         retval = register_filesystem(&trace_fs_type);
diff --combined fs/ufs/super.c

index 098508a93c7b302fe8e6ab65ec7cd753d2515634,dc33f94163404f04155cddb7a76eae84c96df42b..250579a80d90bd379caee1b7aeaf252dda97c34d
--- 1/fs/ufs/super.c
--- 2/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@@ -80,7 -80,6 +80,7 @@@
   #include <linux/stat.h>
   #include <linux/string.h>
   #include <linux/blkdev.h>
+ +#include <linux/backing-dev.h>
   #include <linux/init.h>
   #include <linux/parser.h>
   #include <linux/buffer_head.h>
@@@ -695,6 -694,7 +695,7 @@@ static int ufs_sync_fs(struct super_blo
         unsigned flags;
   
         lock_ufs(sb);
+       mutex_lock(&UFS_SB(sb)->s_lock);
   
         UFSD("ENTER\n");
   
@@@ -712,6 -712,7 +713,7 @@@
         ufs_put_cstotal(sb);
   
         UFSD("EXIT\n");
+       mutex_unlock(&UFS_SB(sb)->s_lock);
         unlock_ufs(sb);
   
         return 0;
@@@ -800,6 -801,7 +802,7 @@@ static int ufs_fill_super(struct super_
         UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
         
         mutex_init(&sbi->mutex);
+       mutex_init(&sbi->s_lock);
         spin_lock_init(&sbi->work_lock);
         INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
         /*
@@@ -1278,6 -1280,7 +1281,7 @@@ static int ufs_remount (struct super_bl
   
         sync_filesystem(sb);
         lock_ufs(sb);
+       mutex_lock(&UFS_SB(sb)->s_lock);
         uspi = UFS_SB(sb)->s_uspi;
         flags = UFS_SB(sb)->s_flags;
         usb1 = ubh_get_usb_first(uspi);
@@@ -1291,6 -1294,7 +1295,7 @@@
         new_mount_opt = 0;
         ufs_set_opt (new_mount_opt, ONERROR_LOCK);
         if (!ufs_parse_options (data, &new_mount_opt)) {
+               mutex_unlock(&UFS_SB(sb)->s_lock);
                 unlock_ufs(sb);
                 return -EINVAL;
         }
@@@ -1298,12 -1302,14 +1303,14 @@@
                 new_mount_opt |= ufstype;
         } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
                 pr_err("ufstype can't be changed during remount\n");
+               mutex_unlock(&UFS_SB(sb)->s_lock);
                 unlock_ufs(sb);
                 return -EINVAL;
         }
   
         if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
                 UFS_SB(sb)->s_mount_opt = new_mount_opt;
+               mutex_unlock(&UFS_SB(sb)->s_lock);
                 unlock_ufs(sb);
                 return 0;
         }
@@@ -1327,6 -1333,7 +1334,7 @@@
          */
   #ifndef CONFIG_UFS_FS_WRITE
                 pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
+               mutex_unlock(&UFS_SB(sb)->s_lock);
                 unlock_ufs(sb);
                 return -EINVAL;
   #else
@@@ -1336,11 -1343,13 +1344,13 @@@
                     ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
                     ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
                         pr_err("this ufstype is read-only supported\n");
+                       mutex_unlock(&UFS_SB(sb)->s_lock);
                         unlock_ufs(sb);
                         return -EINVAL;
                 }
                 if (!ufs_read_cylinder_structures(sb)) {
                         pr_err("failed during remounting\n");
+                       mutex_unlock(&UFS_SB(sb)->s_lock);
                         unlock_ufs(sb);
                         return -EPERM;
                 }
@@@ -1348,6 -1357,7 +1358,7 @@@
   #endif
         }
         UFS_SB(sb)->s_mount_opt = new_mount_opt;
+       mutex_unlock(&UFS_SB(sb)->s_lock);
         unlock_ufs(sb);
         return 0;
   }
diff --combined fs/xfs/xfs_file.c

index 874507de3485b818e94bfcd0348f79e9747fdce6,71c2c712e609afca2e9f5c93049ca2e641221108..f0e8249722d40a0dcaf9f31bc25effaba142b0a9
--- 1/fs/xfs/xfs_file.c
--- 2/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@@ -41,7 -41,6 +41,7 @@@
   #include <linux/dcache.h>
   #include <linux/falloc.h>
   #include <linux/pagevec.h>
+ +#include <linux/backing-dev.h>
   
   static const struct vm_operations_struct xfs_file_vm_ops;
   
@@@ -80,15 -79,14 +80,15 @@@ xfs_rw_ilock_demote
   }
   
   /*
- - *    xfs_iozero
+ + * xfs_iozero clears the specified range supplied via the page cache (except in
+ + * the DAX case). Writes through the page cache will allocate blocks over holes,
+ + * though the callers usually map the holes first and avoid them. If a block is
+ + * not completely zeroed, then it will be read from disk before being partially
+ + * zeroed.
    *
- - *    xfs_iozero clears the specified range of buffer supplied,
- - *    and marks all the affected blocks as valid and modified.  If
- - *    an affected block is not allocated, it will be allocated.  If
- - *    an affected block is not completely overwritten, and is not
- - *    valid before the operation, it will be read from disk before
- - *    being partially zeroed.
+ + * In the DAX case, we can just directly write to the underlying pages. This
+ + * will not allocate blocks, but will avoid holes and unwritten extents and so
+ + * not do unnecessary work.
    */
   int
   xfs_iozero(
@@@ -98,8 -96,7 +98,8 @@@
   {
         struct page             *page;
         struct address_space    *mapping;
- -      int                     status;
+ +      int                     status = 0;
+ +
   
         mapping = VFS_I(ip)->i_mapping;
         do {
@@@ -111,30 -108,23 +111,30 @@@
                 if (bytes > count)
                         bytes = count;
   
- -              status = pagecache_write_begin(NULL, mapping, pos, bytes,
- -                                      AOP_FLAG_UNINTERRUPTIBLE,
- -                                      &page, &fsdata);
- -              if (status)
- -                      break;
+ +              if (IS_DAX(VFS_I(ip))) {
+ +                      status = dax_zero_page_range(VFS_I(ip), pos, bytes,
+ +                                                   xfs_get_blocks_direct);
+ +                      if (status)
+ +                              break;
+ +              } else {
+ +                      status = pagecache_write_begin(NULL, mapping, pos, bytes,
+ +                                              AOP_FLAG_UNINTERRUPTIBLE,
+ +                                              &page, &fsdata);
+ +                      if (status)
+ +                              break;
   
- -              zero_user(page, offset, bytes);
+ +                      zero_user(page, offset, bytes);
   
- -              status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
- -                                      page, fsdata);
- -              WARN_ON(status <= 0); /* can't return less than zero! */
+ +                      status = pagecache_write_end(NULL, mapping, pos, bytes,
+ +                                              bytes, page, fsdata);
+ +                      WARN_ON(status <= 0); /* can't return less than zero! */
+ +                      status = 0;
+ +              }
                 pos += bytes;
                 count -= bytes;
- -              status = 0;
         } while (count);
   
- -      return (-status);
+ +      return status;
   }
   
   int
@@@ -148,7 -138,7 +148,7 @@@ xfs_update_prealloc_flags
         tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
         error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
         if (error) {
- -              xfs_trans_cancel(tp, 0);
+ +              xfs_trans_cancel(tp);
                 return error;
         }
   
@@@ -170,7 -160,7 +170,7 @@@
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
         if (flags & XFS_PREALLOC_SYNC)
                 xfs_trans_set_sync(tp);
- -      return xfs_trans_commit(tp, 0);
+ +      return xfs_trans_commit(tp);
   }
   
   /*
@@@ -294,7 -284,7 +294,7 @@@ xfs_file_read_iter
         if (file->f_mode & FMODE_NOCMTIME)
                 ioflags |= XFS_IO_INVIS;
   
- -      if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
+ +      if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
                 xfs_buftarg_t   *target =
                         XFS_IS_REALTIME_INODE(ip) ?
                                 mp->m_rtdev_targp : mp->m_ddev_targp;
@@@ -388,11 -378,7 +388,11 @@@ xfs_file_splice_read
   
         trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
   
- -      ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+ +      /* for dax, we need to avoid the page cache */
+ +      if (IS_DAX(VFS_I(ip)))
+ +              ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
+ +      else
+ +              ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
         if (ret > 0)
                 XFS_STATS_ADD(xs_read_bytes, ret);
   
@@@ -577,6 -563,13 +577,13 @@@ restart
         if (error)
                 return error;
   
+       /* For changing security info in file_remove_privs() we need i_mutex */
+       if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
+               xfs_rw_iunlock(ip, *iolock);
+               *iolock = XFS_IOLOCK_EXCL;
+               xfs_rw_ilock(ip, *iolock);
+               goto restart;
+       }
         /*
          * If the offset is beyond the size of the file, we need to zero any
          * blocks that fall between the existing EOF and the start of this
@@@ -637,7 -630,9 +644,9 @@@
          * setgid bits if the process is not being run by root.  This keeps
          * people from modifying setuid and setgid binaries.
          */
-       return file_remove_suid(file);
+       if (!IS_NOSEC(inode))
+               return file_remove_privs(file);
+       return 0;
   }
   
   /*
@@@ -686,7 -681,7 +695,7 @@@ xfs_file_dio_aio_write
                                         mp->m_rtdev_targp : mp->m_ddev_targp;
   
         /* DIO must be aligned to device logical sector size */
- -      if ((pos | count) & target->bt_logical_sectormask)
+ +      if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask))
                 return -EINVAL;
   
         /* "unaligned" here means not aligned to a filesystem block */
@@@ -772,11 -767,8 +781,11 @@@
   out:
         xfs_rw_iunlock(ip, iolock);
   
- -      /* No fallback to buffered IO on errors for XFS. */
- -      ASSERT(ret < 0 || ret == count);
+ +      /*
+ +       * No fallback to buffered IO on errors for XFS. DAX can result in
+ +       * partial writes, but direct IO will either complete fully or fail.
+ +       */
+ +      ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
         return ret;
   }
   
@@@ -859,7 -851,7 +868,7 @@@ xfs_file_write_iter
         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                 return -EIO;
   
- -      if (unlikely(iocb->ki_flags & IOCB_DIRECT))
+ +      if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
                 ret = xfs_file_dio_aio_write(iocb, from);
         else
                 ret = xfs_file_buffered_aio_write(iocb, from);
@@@ -1080,6 -1072,17 +1089,6 @@@ xfs_file_readdir
         return xfs_readdir(ip, ctx, bufsize);
   }
   
- -STATIC int
- -xfs_file_mmap(
- -      struct file     *filp,
- -      struct vm_area_struct *vma)
- -{
- -      vma->vm_ops = &xfs_file_vm_ops;
- -
- -      file_accessed(filp);
- -      return 0;
- -}
- -
   /*
    * This type is designed to indicate the type of offset we would like
    * to search from page cache for xfs_seek_hole_data().
@@@ -1460,83 -1463,48 +1469,83 @@@ xfs_file_llseek
    * ordering of:
    *
    * mmap_sem (MM)
- - *   i_mmap_lock (XFS - truncate serialisation)
- - *     page_lock (MM)
- - *       i_lock (XFS - extent map serialisation)
+ + *   sb_start_pagefault(vfs, freeze)
+ + *     i_mmap_lock (XFS - truncate serialisation)
+ + *       page_lock (MM)
+ + *         i_lock (XFS - extent map serialisation)
+ + */
+ +
+ +/*
+ + * mmap()d file has taken write protection fault and is being made writable. We
+ + * can set the page state up correctly for a writable page, which means we can
+ + * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ + * mapping.
    */
   STATIC int
- -xfs_filemap_fault(
+ +xfs_filemap_page_mkwrite(
         struct vm_area_struct   *vma,
         struct vm_fault         *vmf)
   {
- -      struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
- -      int                     error;
+ +      struct inode            *inode = file_inode(vma->vm_file);
+ +      int                     ret;
   
- -      trace_xfs_filemap_fault(ip);
+ +      trace_xfs_filemap_page_mkwrite(XFS_I(inode));
   
- -      xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- -      error = filemap_fault(vma, vmf);
- -      xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ +      sb_start_pagefault(inode->i_sb);
+ +      file_update_time(vma->vm_file);
+ +      xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
   
- -      return error;
+ +      if (IS_DAX(inode)) {
+ +              ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
+ +                                  xfs_end_io_dax_write);
+ +      } else {
+ +              ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ +              ret = block_page_mkwrite_return(ret);
+ +      }
+ +
+ +      xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ +      sb_end_pagefault(inode->i_sb);
+ +
+ +      return ret;
   }
   
- -/*
- - * mmap()d file has taken write protection fault and is being made writable. We
- - * can set the page state up correctly for a writable page, which means we can
- - * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
- - * mapping.
- - */
   STATIC int
- -xfs_filemap_page_mkwrite(
+ +xfs_filemap_fault(
         struct vm_area_struct   *vma,
         struct vm_fault         *vmf)
   {
- -      struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
- -      int                     error;
+ +      struct xfs_inode        *ip = XFS_I(file_inode(vma->vm_file));
+ +      int                     ret;
+ +
+ +      trace_xfs_filemap_fault(ip);
   
- -      trace_xfs_filemap_page_mkwrite(ip);
+ +      /* DAX can shortcut the normal fault path on write faults! */
+ +      if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
+ +              return xfs_filemap_page_mkwrite(vma, vmf);
   
         xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
- -      error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+ +      ret = filemap_fault(vma, vmf);
         xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
   
- -      return error;
+ +      return ret;
+ +}
+ +
+ +static const struct vm_operations_struct xfs_file_vm_ops = {
+ +      .fault          = xfs_filemap_fault,
+ +      .map_pages      = filemap_map_pages,
+ +      .page_mkwrite   = xfs_filemap_page_mkwrite,
+ +};
+ +
+ +STATIC int
+ +xfs_file_mmap(
+ +      struct file     *filp,
+ +      struct vm_area_struct *vma)
+ +{
+ +      file_accessed(filp);
+ +      vma->vm_ops = &xfs_file_vm_ops;
+ +      if (IS_DAX(file_inode(filp)))
+ +              vma->vm_flags |= VM_MIXEDMAP;
+ +      return 0;
   }
   
   const struct file_operations xfs_file_operations = {
@@@ -1567,3 -1535,9 +1576,3 @@@ const struct file_operations xfs_dir_fi
   #endif
         .fsync          = xfs_dir_fsync,
   };
- -
- -static const struct vm_operations_struct xfs_file_vm_ops = {
- -      .fault          = xfs_filemap_fault,
- -      .map_pages      = filemap_map_pages,
- -      .page_mkwrite   = xfs_filemap_page_mkwrite,
- -};
diff --combined include/linux/fs.h

index 8a81fcbb0074d9d89dc7aff09367d88c051f69d4,1e658b11c2656bcdcabc939fa0b120bf3e6e0e29..a0653e560c2679a2eea870035a55cd3282e47894
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -35,7 -35,6 +35,7 @@@
   #include <uapi/linux/fs.h>
   
   struct backing_dev_info;
+ +struct bdi_writeback;
   struct export_operations;
   struct hd_geometry;
   struct iovec;
@@@ -70,7 -69,6 +70,7 @@@ typedef int (get_block_t)(struct inode 
                         struct buffer_head *bh_result, int create);
   typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
                         ssize_t bytes, void *private);
+ +typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
   
   #define MAY_EXEC              0x00000001
   #define MAY_WRITE             0x00000002
@@@ -636,14 -634,6 +636,14 @@@ struct inode 
   
         struct hlist_node       i_hash;
         struct list_head        i_wb_list;      /* backing dev IO list */
+ +#ifdef CONFIG_CGROUP_WRITEBACK
+ +      struct bdi_writeback    *i_wb;          /* the associated cgroup wb */
+ +
+ +      /* foreign inode detection, see wbc_detach_inode() */
+ +      int                     i_wb_frn_winner;
+ +      u16                     i_wb_frn_avg_time;
+ +      u16                     i_wb_frn_history;
+ +#endif
         struct list_head        i_lru;          /* inode LRU list */
         struct list_head        i_sb_list;
         union {
@@@ -1242,8 -1232,6 +1242,8 @@@ struct mm_struct
   #define UMOUNT_NOFOLLOW       0x00000008      /* Don't follow symlink on umount */
   #define UMOUNT_UNUSED 0x80000000      /* Flag guaranteed to be unused */
   
+ +/* sb->s_iflags */
+ +#define SB_I_CGROUPWB 0x00000001      /* cgroup-aware writeback enabled */
   
   /* Possible states of 'frozen' field */
   enum {
@@@ -1282,7 -1270,6 +1282,7 @@@ struct super_block 
         const struct quotactl_ops       *s_qcop;
         const struct export_operations *s_export_op;
         unsigned long           s_flags;
+ +      unsigned long           s_iflags;       /* internal SB_I_* flags */
         unsigned long           s_magic;
         struct dentry           *s_root;
         struct rw_semaphore     s_umount;
@@@ -1654,7 -1641,6 +1654,6 @@@ struct inode_operations 
         int (*set_acl)(struct inode *, struct posix_acl *, int);
   
         /* WARNING: probably going away soon, do not use! */
-       int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
   } ____cacheline_aligned;
   
   ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
@@@ -1819,11 -1805,6 +1818,11 @@@ struct super_operations 
    *
    * I_DIO_WAKEUP               Never set.  Only used as a key for wait_on_bit().
    *
+ + * I_WB_SWITCH                Cgroup bdi_writeback switching in progress.  Used to
+ + *                    synchronize competing switching instances and to tell
+ + *                    wb stat updates to grab mapping->tree_lock.  See
+ + *                    inode_switch_wb_work_fn() for details.
+ + *
    * Q: What is the difference between I_WILL_FREE and I_FREEING?
    */
   #define I_DIRTY_SYNC          (1 << 0)
@@@ -1843,7 -1824,6 +1842,7 @@@
   #define I_DIRTY_TIME          (1 << 11)
   #define __I_DIRTY_TIME_EXPIRED        12
   #define I_DIRTY_TIME_EXPIRED  (1 << __I_DIRTY_TIME_EXPIRED)
+ +#define I_WB_SWITCH           (1 << 13)
   
   #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
   #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
@@@ -1917,7 -1897,6 +1916,7 @@@ struct file_system_type 
   #define FS_HAS_SUBTYPE                4
   #define FS_USERNS_MOUNT               8       /* Can be mounted by userns root */
   #define FS_USERNS_DEV_MOUNT   16 /* A userns mount does not imply MNT_NODEV */
+ +#define FS_USERNS_VISIBLE     32      /* FS must already be visible */
   #define FS_RENAME_DOES_D_MOVE 32768   /* FS will handle d_move() during rename() internally. */
         struct dentry *(*mount) (struct file_system_type *, int,
                        const char *, void *);
@@@ -2005,6 -1984,7 +2004,6 @@@ extern int vfs_ustat(dev_t, struct ksta
   extern int freeze_super(struct super_block *super);
   extern int thaw_super(struct super_block *super);
   extern bool our_mnt(struct vfsmount *mnt);
- -extern bool fs_fully_visible(struct file_system_type *);
   
   extern int current_umask(void);
   
@@@ -2213,7 -2193,6 +2212,6 @@@ extern struct file *file_open_name(stru
   extern struct file *filp_open(const char *, int, umode_t);
   extern struct file *file_open_root(struct dentry *, struct vfsmount *,
                                    const char *, int);
- extern int vfs_open(const struct path *, struct file *, const struct cred *);
   extern struct file * dentry_open(const struct path *, int, const struct cred *);
   extern int filp_close(struct file *, fl_owner_t id);
   
@@@ -2260,13 -2239,7 +2258,13 @@@ extern struct super_block *freeze_bdev(
   extern void emergency_thaw_all(void);
   extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
   extern int fsync_bdev(struct block_device *);
- -extern int sb_is_blkdev_sb(struct super_block *sb);
+ +
+ +extern struct super_block *blockdev_superblock;
+ +
+ +static inline bool sb_is_blkdev_sb(struct super_block *sb)
+ +{
+ +      return sb == blockdev_superblock;
+ +}
   #else
   static inline void bd_forget(struct inode *inode) {}
   static inline int sync_blockdev(struct block_device *bdev) { return 0; }
@@@ -2305,9 -2278,6 +2303,9 @@@ extern struct block_device *blkdev_get_
   extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
                                               void *holder);
   extern void blkdev_put(struct block_device *bdev, fmode_t mode);
+ +extern int __blkdev_reread_part(struct block_device *bdev);
+ +extern int blkdev_reread_part(struct block_device *bdev);
+ +
   #ifdef CONFIG_SYSFS
   extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
   extern void bd_unlink_disk_holder(struct block_device *bdev,
@@@ -2530,6 -2500,8 +2528,8 @@@ extern struct file * open_exec(const ch
   extern int is_subdir(struct dentry *, struct dentry *);
   extern int path_is_under(struct path *, struct path *);
   
+ extern char *file_path(struct file *, char *, int);
+ 
   #include <linux/err.h>
   
   /* needed for stackable file system support */
@@@ -2581,7 -2553,12 +2581,12 @@@ extern struct inode *new_inode_pseudo(s
   extern struct inode *new_inode(struct super_block *sb);
   extern void free_inode_nonrcu(struct inode *inode);
   extern int should_remove_suid(struct dentry *);
- extern int file_remove_suid(struct file *);
+ extern int file_remove_privs(struct file *);
+ extern int dentry_needs_remove_privs(struct dentry *dentry);
+ static inline int file_needs_remove_privs(struct file *file)
+ {
+       return dentry_needs_remove_privs(file->f_path.dentry);
+ }
   
   extern void __insert_inode_hash(struct inode *, unsigned long hashval);
   static inline void insert_inode_hash(struct inode *inode)
@@@ -2656,13 -2633,9 +2661,13 @@@ ssize_t dax_do_io(struct kiocb *, struc
   int dax_clear_blocks(struct inode *, sector_t block, long size);
   int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
   int dax_truncate_page(struct inode *, loff_t from, get_block_t);
- -int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
+ +int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+ +              dax_iodone_t);
+ +int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
+ +              dax_iodone_t);
   int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
- -#define dax_mkwrite(vma, vmf, gb)     dax_fault(vma, vmf, gb)
+ +#define dax_mkwrite(vma, vmf, gb, iod)                dax_fault(vma, vmf, gb, iod)
+ +#define __dax_mkwrite(vma, vmf, gb, iod)      __dax_fault(vma, vmf, gb, iod)
   
   #ifdef CONFIG_BLOCK
   typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
@@@ -2816,8 -2789,6 +2821,8 @@@ extern struct dentry *simple_lookup(str
   extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
   extern const struct file_operations simple_dir_operations;
   extern const struct inode_operations simple_dir_inode_operations;
+ +extern void make_empty_dir_inode(struct inode *inode);
+ +extern bool is_empty_dir_inode(struct inode *inode);
   struct tree_descr { char *name; const struct file_operations *ops; int mode; };
   struct dentry *d_alloc_name(struct dentry *, const char *);
   extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *);
diff --combined include/linux/pagemap.h

index fb0814ca65c7328b0bb2bcf9be958dbff2a05c04,808942d3106260231b5d4d870c4be9eab52e5429..a6c78e00ea9684a784938ed39229c2018ffd8e75
--- 1/include/linux/pagemap.h
--- 2/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@@ -651,8 -651,7 +651,8 @@@ int add_to_page_cache_locked(struct pag
   int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
                                 pgoff_t index, gfp_t gfp_mask);
   extern void delete_from_page_cache(struct page *page);
- -extern void __delete_from_page_cache(struct page *page, void *shadow);
+ +extern void __delete_from_page_cache(struct page *page, void *shadow,
+ +                                   struct mem_cgroup *memcg);
   int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
   
   /*
@@@ -671,4 -670,10 +671,10 @@@ static inline int add_to_page_cache(str
         return error;
   }
   
+ static inline unsigned long dir_pages(struct inode *inode)
+ {
+       return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >>
+                              PAGE_CACHE_SHIFT;
+ }
+ 
   #endif /* _LINUX_PAGEMAP_H */
diff --combined kernel/events/core.c

index d1f37ddd19608d26a32dd130d491cf7c98c41d9c,5c964e84548355b1958814f641b1139655a2d549..e965cfae420725645349c4facd145828e4e59d13
--- 1/kernel/events/core.c
--- 2/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -36,7 -36,7 +36,7 @@@
   #include <linux/kernel_stat.h>
   #include <linux/cgroup.h>
   #include <linux/perf_event.h>
- -#include <linux/ftrace_event.h>
+ +#include <linux/trace_events.h>
   #include <linux/hw_breakpoint.h>
   #include <linux/mm_types.h>
   #include <linux/module.h>
@@@ -51,11 -51,9 +51,11 @@@
   
   static struct workqueue_struct *perf_wq;
   
+ +typedef int (*remote_function_f)(void *);
+ +
   struct remote_function_call {
         struct task_struct      *p;
- -      int                     (*func)(void *info);
+ +      remote_function_f       func;
         void                    *info;
         int                     ret;
   };
@@@ -88,7 -86,7 +88,7 @@@ static void remote_function(void *data
    *        -EAGAIN - when the process moved away
    */
   static int
- -task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
+ +task_function_call(struct task_struct *p, remote_function_f func, void *info)
   {
         struct remote_function_call data = {
                 .p      = p,
@@@ -112,7 -110,7 +112,7 @@@
    *
    * returns: @func return value or -ENXIO when the cpu is offline
    */
- -static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
+ +static int cpu_function_call(int cpu, remote_function_f func, void *info)
   {
         struct remote_function_call data = {
                 .p      = NULL,
@@@ -749,31 -747,62 +749,31 @@@ perf_cgroup_mark_enabled(struct perf_ev
   /*
    * function must be called with interrupts disbled
    */
- -static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr)
+ +static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
   {
         struct perf_cpu_context *cpuctx;
- -      enum hrtimer_restart ret = HRTIMER_NORESTART;
         int rotations = 0;
   
         WARN_ON(!irqs_disabled());
   
         cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
- -
         rotations = perf_rotate_context(cpuctx);
   
- -      /*
- -       * arm timer if needed
- -       */
- -      if (rotations) {
+ +      raw_spin_lock(&cpuctx->hrtimer_lock);
+ +      if (rotations)
                 hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
- -              ret = HRTIMER_RESTART;
- -      }
- -
- -      return ret;
- -}
- -
- -/* CPU is going down */
- -void perf_cpu_hrtimer_cancel(int cpu)
- -{
- -      struct perf_cpu_context *cpuctx;
- -      struct pmu *pmu;
- -      unsigned long flags;
- -
- -      if (WARN_ON(cpu != smp_processor_id()))
- -              return;
- -
- -      local_irq_save(flags);
- -
- -      rcu_read_lock();
- -
- -      list_for_each_entry_rcu(pmu, &pmus, entry) {
- -              cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
- -
- -              if (pmu->task_ctx_nr == perf_sw_context)
- -                      continue;
- -
- -              hrtimer_cancel(&cpuctx->hrtimer);
- -      }
- -
- -      rcu_read_unlock();
+ +      else
+ +              cpuctx->hrtimer_active = 0;
+ +      raw_spin_unlock(&cpuctx->hrtimer_lock);
   
- -      local_irq_restore(flags);
+ +      return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
   }
   
- -static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
+ +static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
   {
- -      struct hrtimer *hr = &cpuctx->hrtimer;
+ +      struct hrtimer *timer = &cpuctx->hrtimer;
         struct pmu *pmu = cpuctx->ctx.pmu;
- -      int timer;
+ +      u64 interval;
   
         /* no multiplexing needed for SW PMU */
         if (pmu->task_ctx_nr == perf_sw_context)
@@@ -783,36 -812,31 +783,36 @@@
          * check default is sane, if not set then force to
          * default interval (1/tick)
          */
- -      timer = pmu->hrtimer_interval_ms;
- -      if (timer < 1)
- -              timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
+ +      interval = pmu->hrtimer_interval_ms;
+ +      if (interval < 1)
+ +              interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
   
- -      cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
+ +      cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
   
- -      hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
- -      hr->function = perf_cpu_hrtimer_handler;
+ +      raw_spin_lock_init(&cpuctx->hrtimer_lock);
+ +      hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ +      timer->function = perf_mux_hrtimer_handler;
   }
   
- -static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx)
+ +static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
   {
- -      struct hrtimer *hr = &cpuctx->hrtimer;
+ +      struct hrtimer *timer = &cpuctx->hrtimer;
         struct pmu *pmu = cpuctx->ctx.pmu;
+ +      unsigned long flags;
   
         /* not for SW PMU */
         if (pmu->task_ctx_nr == perf_sw_context)
- -              return;
+ +              return 0;
   
- -      if (hrtimer_active(hr))
- -              return;
+ +      raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
+ +      if (!cpuctx->hrtimer_active) {
+ +              cpuctx->hrtimer_active = 1;
+ +              hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
+ +              hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+ +      }
+ +      raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
   
- -      if (!hrtimer_callback_running(hr))
- -              __hrtimer_start_range_ns(hr, cpuctx->hrtimer_interval,
- -                                       0, HRTIMER_MODE_REL_PINNED, 0);
+ +      return 0;
   }
   
   void perf_pmu_disable(struct pmu *pmu)
@@@ -889,30 -913,10 +889,30 @@@ static void put_ctx(struct perf_event_c
    * Those places that change perf_event::ctx will hold both
    * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
    *
- - * Lock ordering is by mutex address. There is one other site where
- - * perf_event_context::mutex nests and that is put_event(). But remember that
- - * that is a parent<->child context relation, and migration does not affect
- - * children, therefore these two orderings should not interact.
+ + * Lock ordering is by mutex address. There are two other sites where
+ + * perf_event_context::mutex nests and those are:
+ + *
+ + *  - perf_event_exit_task_context()  [ child , 0 ]
+ + *      __perf_event_exit_task()
+ + *        sync_child_event()
+ + *          put_event()                       [ parent, 1 ]
+ + *
+ + *  - perf_event_init_context()               [ parent, 0 ]
+ + *      inherit_task_group()
+ + *        inherit_group()
+ + *          inherit_event()
+ + *            perf_event_alloc()
+ + *              perf_init_event()
+ + *                perf_try_init_event()       [ child , 1 ]
+ + *
+ + * While it appears there is an obvious deadlock here -- the parent and child
+ + * nesting levels are inverted between the two. This is in fact safe because
+ + * life-time rules separate them. That is an exiting task cannot fork, and a
+ + * spawning task cannot (yet) exit.
+ + *
+ + * But remember that that these are parent<->child context relations, and
+ + * migration does not affect children, therefore these two orderings should not
+ + * interact.
    *
    * The change in perf_event::ctx does not affect children (as claimed above)
    * because the sys_perf_event_open() case will install a new event and break
@@@ -1502,17 -1506,11 +1502,17 @@@ static int __init perf_workqueue_init(v
   
   core_initcall(perf_workqueue_init);
   
+ +static inline int pmu_filter_match(struct perf_event *event)
+ +{
+ +      struct pmu *pmu = event->pmu;
+ +      return pmu->filter_match ? pmu->filter_match(event) : 1;
+ +}
+ +
   static inline int
   event_filter_match(struct perf_event *event)
   {
         return (event->cpu == -1 || event->cpu == smp_processor_id())
- -          && perf_cgroup_match(event);
+ +          && perf_cgroup_match(event) && pmu_filter_match(event);
   }
   
   static void
@@@ -1917,7 -1915,7 +1917,7 @@@ group_sched_in(struct perf_event *group
   
         if (event_sched_in(group_event, cpuctx, ctx)) {
                 pmu->cancel_txn(pmu);
- -              perf_cpu_hrtimer_restart(cpuctx);
+ +              perf_mux_hrtimer_restart(cpuctx);
                 return -EAGAIN;
         }
   
@@@ -1964,7 -1962,7 +1964,7 @@@ group_error
   
         pmu->cancel_txn(pmu);
   
- -      perf_cpu_hrtimer_restart(cpuctx);
+ +      perf_mux_hrtimer_restart(cpuctx);
   
         return -EAGAIN;
   }
@@@ -2237,7 -2235,7 +2237,7 @@@ static int __perf_event_enable(void *in
                  */
                 if (leader != event) {
                         group_sched_out(leader, cpuctx, ctx);
- -                      perf_cpu_hrtimer_restart(cpuctx);
+ +                      perf_mux_hrtimer_restart(cpuctx);
                 }
                 if (leader->attr.pinned) {
                         update_group_times(leader);
@@@ -3424,6 -3422,7 +3424,6 @@@ static void free_event_rcu(struct rcu_h
         if (event->ns)
                 put_pid_ns(event->ns);
         perf_event_free_filter(event);
- -      perf_event_free_bpf_prog(event);
         kfree(event);
   }
   
@@@ -3554,8 -3553,6 +3554,8 @@@ static void __free_event(struct perf_ev
                         put_callchain_buffers();
         }
   
+ +      perf_event_free_bpf_prog(event);
+ +
         if (event->destroy)
                 event->destroy(event);
   
@@@ -3660,6 -3657,9 +3660,6 @@@ static void perf_remove_from_owner(stru
         }
   }
   
- -/*
- - * Called when the last reference to the file is gone.
- - */
   static void put_event(struct perf_event *event)
   {
         struct perf_event_context *ctx;
@@@ -3697,9 -3697,6 +3697,9 @@@ int perf_event_release_kernel(struct pe
   }
   EXPORT_SYMBOL_GPL(perf_event_release_kernel);
   
+ +/*
+ + * Called when the last reference to the file is gone.
+ + */
   static int perf_release(struct inode *inode, struct file *file)
   {
         put_event(file->private_data);
@@@ -4313,20 -4310,20 +4313,20 @@@ static void ring_buffer_attach(struct p
                 WARN_ON_ONCE(event->rcu_pending);
   
                 old_rb = event->rb;
- -              event->rcu_batches = get_state_synchronize_rcu();
- -              event->rcu_pending = 1;
- -
                 spin_lock_irqsave(&old_rb->event_lock, flags);
                 list_del_rcu(&event->rb_entry);
                 spin_unlock_irqrestore(&old_rb->event_lock, flags);
- -      }
   
- -      if (event->rcu_pending && rb) {
- -              cond_synchronize_rcu(event->rcu_batches);
- -              event->rcu_pending = 0;
+ +              event->rcu_batches = get_state_synchronize_rcu();
+ +              event->rcu_pending = 1;
         }
   
         if (rb) {
+ +              if (event->rcu_pending) {
+ +                      cond_synchronize_rcu(event->rcu_batches);
+ +                      event->rcu_pending = 0;
+ +              }
+ +
                 spin_lock_irqsave(&rb->event_lock, flags);
                 list_add_rcu(&event->rb_entry, &rb->event_list);
                 spin_unlock_irqrestore(&rb->event_lock, flags);
@@@ -5363,9 -5360,9 +5363,9 @@@ void perf_prepare_sample(struct perf_ev
         }
   }
   
- -static void perf_event_output(struct perf_event *event,
- -                              struct perf_sample_data *data,
- -                              struct pt_regs *regs)
+ +void perf_event_output(struct perf_event *event,
+ +                      struct perf_sample_data *data,
+ +                      struct pt_regs *regs)
   {
         struct perf_output_handle handle;
         struct perf_event_header header;
@@@ -5794,7 -5791,7 +5794,7 @@@ static void perf_event_mmap_event(struc
                  * need to add enough zero bytes after the string to handle
                  * the 64bit alignment we do later.
                  */
-               name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64));
+               name = file_path(file, buf, PATH_MAX - sizeof(u64));
                 if (IS_ERR(name)) {
                         name = "//toolong";
                         goto cpy_name;
@@@ -5956,39 -5953,6 +5956,39 @@@ void perf_event_aux_event(struct perf_e
         perf_output_end(&handle);
   }
   
+ +/*
+ + * Lost/dropped samples logging
+ + */
+ +void perf_log_lost_samples(struct perf_event *event, u64 lost)
+ +{
+ +      struct perf_output_handle handle;
+ +      struct perf_sample_data sample;
+ +      int ret;
+ +
+ +      struct {
+ +              struct perf_event_header        header;
+ +              u64                             lost;
+ +      } lost_samples_event = {
+ +              .header = {
+ +                      .type = PERF_RECORD_LOST_SAMPLES,
+ +                      .misc = 0,
+ +                      .size = sizeof(lost_samples_event),
+ +              },
+ +              .lost           = lost,
+ +      };
+ +
+ +      perf_event_header__init_id(&lost_samples_event.header, &sample, event);
+ +
+ +      ret = perf_output_begin(&handle, event,
+ +                              lost_samples_event.header.size);
+ +      if (ret)
+ +              return;
+ +
+ +      perf_output_put(&handle, lost_samples_event);
+ +      perf_event__output_id_sample(event, &handle, &sample);
+ +      perf_output_end(&handle);
+ +}
+ +
   /*
    * IRQ throttle logging
    */
@@@ -6879,8 -6843,9 +6879,8 @@@ static void perf_swevent_start_hrtimer(
         } else {
                 period = max_t(u64, 10000, hwc->sample_period);
         }
- -      __hrtimer_start_range_ns(&hwc->hrtimer,
- -                              ns_to_ktime(period), 0,
- -                              HRTIMER_MODE_REL_PINNED, 0);
+ +      hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
+ +                    HRTIMER_MODE_REL_PINNED);
   }
   
   static void perf_swevent_cancel_hrtimer(struct perf_event *event)
@@@ -7181,8 -7146,6 +7181,8 @@@ perf_event_mux_interval_ms_show(struct 
         return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
   }
   
+ +static DEFINE_MUTEX(mux_interval_mutex);
+ +
   static ssize_t
   perf_event_mux_interval_ms_store(struct device *dev,
                                  struct device_attribute *attr,
@@@ -7202,21 -7165,17 +7202,21 @@@
         if (timer == pmu->hrtimer_interval_ms)
                 return count;
   
+ +      mutex_lock(&mux_interval_mutex);
         pmu->hrtimer_interval_ms = timer;
   
         /* update all cpuctx for this PMU */
- -      for_each_possible_cpu(cpu) {
+ +      get_online_cpus();
+ +      for_each_online_cpu(cpu) {
                 struct perf_cpu_context *cpuctx;
                 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
                 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
   
- -              if (hrtimer_active(&cpuctx->hrtimer))
- -                      hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval);
+ +              cpu_function_call(cpu,
+ +                      (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
         }
+ +      put_online_cpus();
+ +      mutex_unlock(&mux_interval_mutex);
   
         return count;
   }
@@@ -7321,7 -7280,7 +7321,7 @@@ skip_type
                 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
                 cpuctx->ctx.pmu = pmu;
   
- -              __perf_cpu_hrtimer_init(cpuctx, cpu);
+ +              __perf_mux_hrtimer_init(cpuctx, cpu);
   
                 cpuctx->unique_pmu = pmu;
         }
@@@ -7405,12 -7364,7 +7405,12 @@@ static int perf_try_init_event(struct p
                 return -ENODEV;
   
         if (event->group_leader != event) {
- -              ctx = perf_event_ctx_lock(event->group_leader);
+ +              /*
+ +               * This ctx->mutex can nest when we're called through
+ +               * inheritance. See the perf_event_ctx_lock_nested() comment.
+ +               */
+ +              ctx = perf_event_ctx_lock_nested(event->group_leader,
+ +                                               SINGLE_DEPTH_NESTING);
                 BUG_ON(!ctx);
         }
   
diff --combined mm/filemap.c

index 11f10efd637c2d67e071c482951e2bb38a105d6b,f851e36802d573e19f10307ab984c1d52bfc9588..1283fc82545861d155c4eef7013bf8e285040fed
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -100,7 -100,6 +100,7 @@@
    *    ->tree_lock             (page_remove_rmap->set_page_dirty)
    *    bdi.wb->list_lock               (page_remove_rmap->set_page_dirty)
    *    ->inode->i_lock         (page_remove_rmap->set_page_dirty)
+ + *    ->memcg->move_lock      (page_remove_rmap->mem_cgroup_begin_page_stat)
    *    bdi.wb->list_lock               (zap_pte_range->set_page_dirty)
    *    ->inode->i_lock         (zap_pte_range->set_page_dirty)
    *    ->private_lock          (zap_pte_range->__set_page_dirty_buffers)
@@@ -175,11 -174,9 +175,11 @@@ static void page_cache_tree_delete(stru
   /*
    * Delete a page from the page cache and free it. Caller has to make
    * sure the page is locked and that nobody else uses it - or that usage
- - * is safe.  The caller must hold the mapping's tree_lock.
+ + * is safe.  The caller must hold the mapping's tree_lock and
+ + * mem_cgroup_begin_page_stat().
    */
- -void __delete_from_page_cache(struct page *page, void *shadow)
+ +void __delete_from_page_cache(struct page *page, void *shadow,
+ +                            struct mem_cgroup *memcg)
   {
         struct address_space *mapping = page->mapping;
   
@@@ -199,9 -196,7 +199,9 @@@
         page->mapping = NULL;
         /* Leave page->index set: truncation lookup relies upon it */
   
- -      __dec_zone_page_state(page, NR_FILE_PAGES);
+ +      /* hugetlb pages do not participate in page cache accounting. */
+ +      if (!PageHuge(page))
+ +              __dec_zone_page_state(page, NR_FILE_PAGES);
         if (PageSwapBacked(page))
                 __dec_zone_page_state(page, NR_SHMEM);
         BUG_ON(page_mapped(page));
@@@ -215,8 -210,7 +215,8 @@@
          * anyway will be cleared before returning page into buddy allocator.
          */
         if (WARN_ON_ONCE(PageDirty(page)))
- -              account_page_cleaned(page, mapping);
+ +              account_page_cleaned(page, mapping, memcg,
+ +                                   inode_to_wb(mapping->host));
   }
   
   /**
@@@ -230,20 -224,14 +230,20 @@@
   void delete_from_page_cache(struct page *page)
   {
         struct address_space *mapping = page->mapping;
+ +      struct mem_cgroup *memcg;
+ +      unsigned long flags;
+ +
         void (*freepage)(struct page *);
   
         BUG_ON(!PageLocked(page));
   
         freepage = mapping->a_ops->freepage;
- -      spin_lock_irq(&mapping->tree_lock);
- -      __delete_from_page_cache(page, NULL);
- -      spin_unlock_irq(&mapping->tree_lock);
+ +
+ +      memcg = mem_cgroup_begin_page_stat(page);
+ +      spin_lock_irqsave(&mapping->tree_lock, flags);
+ +      __delete_from_page_cache(page, NULL, memcg);
+ +      spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ +      mem_cgroup_end_page_stat(memcg);
   
         if (freepage)
                 freepage(page);
@@@ -293,9 -281,7 +293,9 @@@ int __filemap_fdatawrite_range(struct a
         if (!mapping_cap_writeback_dirty(mapping))
                 return 0;
   
+ +      wbc_attach_fdatawrite_inode(&wbc, mapping->host);
         ret = do_writepages(mapping, &wbc);
+ +      wbc_detach_inode(&wbc);
         return ret;
   }
   
@@@ -484,8 -470,6 +484,8 @@@ int replace_page_cache_page(struct pag
         if (!error) {
                 struct address_space *mapping = old->mapping;
                 void (*freepage)(struct page *);
+ +              struct mem_cgroup *memcg;
+ +              unsigned long flags;
   
                 pgoff_t offset = old->index;
                 freepage = mapping->a_ops->freepage;
@@@ -494,22 -478,15 +494,22 @@@
                 new->mapping = mapping;
                 new->index = offset;
   
- -              spin_lock_irq(&mapping->tree_lock);
- -              __delete_from_page_cache(old, NULL);
+ +              memcg = mem_cgroup_begin_page_stat(old);
+ +              spin_lock_irqsave(&mapping->tree_lock, flags);
+ +              __delete_from_page_cache(old, NULL, memcg);
                 error = radix_tree_insert(&mapping->page_tree, offset, new);
                 BUG_ON(error);
                 mapping->nrpages++;
- -              __inc_zone_page_state(new, NR_FILE_PAGES);
+ +
+ +              /*
+ +               * hugetlb pages do not participate in page cache accounting.
+ +               */
+ +              if (!PageHuge(new))
+ +                      __inc_zone_page_state(new, NR_FILE_PAGES);
                 if (PageSwapBacked(new))
                         __inc_zone_page_state(new, NR_SHMEM);
- -              spin_unlock_irq(&mapping->tree_lock);
+ +              spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ +              mem_cgroup_end_page_stat(memcg);
                 mem_cgroup_migrate(old, new, true);
                 radix_tree_preload_end();
                 if (freepage)
@@@ -598,10 -575,7 +598,10 @@@ static int __add_to_page_cache_locked(s
         radix_tree_preload_end();
         if (unlikely(error))
                 goto err_insert;
- -      __inc_zone_page_state(page, NR_FILE_PAGES);
+ +
+ +      /* hugetlb pages do not participate in page cache accounting. */
+ +      if (!huge)
+ +              __inc_zone_page_state(page, NR_FILE_PAGES);
         spin_unlock_irq(&mapping->tree_lock);
         if (!huge)
                 mem_cgroup_commit_charge(page, memcg, false);
@@@ -1680,8 -1654,8 +1680,8 @@@ no_cached_page
                         error = -ENOMEM;
                         goto out;
                 }
- -              error = add_to_page_cache_lru(page, mapping,
- -                                              index, GFP_KERNEL);
+ +              error = add_to_page_cache_lru(page, mapping, index,
+ +                                      GFP_KERNEL & mapping_gfp_mask(mapping));
                 if (error) {
                         page_cache_release(page);
                         if (error == -EEXIST) {
@@@ -1782,8 -1756,7 +1782,8 @@@ static int page_cache_read(struct file 
                 if (!page)
                         return -ENOMEM;
   
- -              ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
+ +              ret = add_to_page_cache_lru(page, mapping, offset,
+ +                              GFP_KERNEL & mapping_gfp_mask(mapping));
                 if (ret == 0)
                         ret = mapping->a_ops->readpage(file, page);
                 else if (ret == -EEXIST)
@@@ -2563,7 -2536,7 +2563,7 @@@ ssize_t __generic_file_write_iter(struc
   
         /* We can write back this queue in page reclaim */
         current->backing_dev_info = inode_to_bdi(inode);
-       err = file_remove_suid(file);
+       err = file_remove_privs(file);
         if (err)
                 goto out;
   
diff --combined mm/memory.c

index 11b9ca1767408dddb147c4b225de0aa31b8f17e7,28c10da1efbca0d7ac0af6189a7c0c2e0789e832..a84fbb772034f2e73eac300e254bd54f2a36ce03
--- 1/mm/memory.c
--- 2/mm/memory.c
+++ b/mm/memory.c
@@@ -2081,12 -2081,11 +2081,12 @@@ static int wp_page_copy(struct mm_struc
                         goto oom;
                 cow_user_page(new_page, old_page, address, vma);
         }
- -      __SetPageUptodate(new_page);
   
         if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
                 goto oom_free_new;
   
+ +      __SetPageUptodate(new_page);
+ +
         mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
   
         /*
@@@ -2690,10 -2689,6 +2690,10 @@@ static int do_anonymous_page(struct mm_
         page = alloc_zeroed_user_highpage_movable(vma, address);
         if (!page)
                 goto oom;
+ +
+ +      if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
+ +              goto oom_free_page;
+ +
         /*
          * The memory barrier inside __SetPageUptodate makes sure that
          * preceeding stores to the page contents become visible before
@@@ -2701,6 -2696,9 +2701,6 @@@
          */
         __SetPageUptodate(page);
   
- -      if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
- -              goto oom_free_page;
- -
         entry = mk_pte(page, vma->vm_page_prot);
         if (vma->vm_flags & VM_WRITE)
                 entry = pte_mkwrite(pte_mkdirty(entry));
@@@ -3726,7 -3724,7 +3726,7 @@@ void print_vma_addr(char *prefix, unsig
                 if (buf) {
                         char *p;
   
-                       p = d_path(&f->f_path, buf, PAGE_SIZE);
+                       p = file_path(f, buf, PAGE_SIZE);
                         if (IS_ERR(p))
                                 p = "?";
                         printk("%s%s[%lx+%lx]", prefix, kbasename(p),
@@@ -3739,7 -3737,7 +3739,7 @@@
   }
   
   #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
- -void might_fault(void)
+ +void __might_fault(const char *file, int line)
   {
         /*
          * Some code (nfs/sunrpc) uses socket ops on kernel memory while
@@@ -3749,15 -3747,21 +3749,15 @@@
          */
         if (segment_eq(get_fs(), KERNEL_DS))
                 return;
- -
- -      /*
- -       * it would be nicer only to annotate paths which are not under
- -       * pagefault_disable, however that requires a larger audit and
- -       * providing helpers like get_user_atomic.
- -       */
- -      if (in_atomic())
+ +      if (pagefault_disabled())
                 return;
- -
- -      __might_sleep(__FILE__, __LINE__, 0);
- -
+ +      __might_sleep(file, line, 0);
+ +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
         if (current->mm)
                 might_lock_read(&current->mm->mmap_sem);
+ +#endif
   }
- -EXPORT_SYMBOL(might_fault);
+ +EXPORT_SYMBOL(__might_fault);
   #endif
   
   #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
diff --combined security/inode.c

index 0e37e4fba8faca36d118ab5a1dc75c139bf3ea2f,6df0d8dae1e0d9b8b1c482d0f2eca622a3e0e414..16622aef9bdea83bee67e5e0080c7b9a17d240ae
--- 1/security/inode.c
--- 2/security/inode.c
+++ b/security/inode.c
@@@ -25,11 -25,6 +25,6 @@@
   static struct vfsmount *mount;
   static int mount_count;
   
- static inline int positive(struct dentry *dentry)
- {
-       return d_really_is_positive(dentry) && !d_unhashed(dentry);
- }
- 
   static int fill_super(struct super_block *sb, void *data, int silent)
   {
         static struct tree_descr files[] = {{""}};
@@@ -201,31 -196,31 +196,29 @@@ void securityfs_remove(struct dentry *d
                 return;
   
         mutex_lock(&d_inode(parent)->i_mutex);
-       if (positive(dentry)) {
-               if (d_really_is_positive(dentry)) {
-                       if (d_is_dir(dentry))
-                               simple_rmdir(d_inode(parent), dentry);
-                       else
-                               simple_unlink(d_inode(parent), dentry);
-                       dput(dentry);
-               }
+       if (simple_positive(dentry)) {
+               if (d_is_dir(dentry))
+                       simple_rmdir(d_inode(parent), dentry);
+               else
+                       simple_unlink(d_inode(parent), dentry);
+               dput(dentry);
         }
         mutex_unlock(&d_inode(parent)->i_mutex);
         simple_release_fs(&mount, &mount_count);
   }
   EXPORT_SYMBOL_GPL(securityfs_remove);
   
- -static struct kobject *security_kobj;
- -
   static int __init securityfs_init(void)
   {
         int retval;
   
- -      security_kobj = kobject_create_and_add("security", kernel_kobj);
- -      if (!security_kobj)
- -              return -EINVAL;
+ +      retval = sysfs_create_mount_point(kernel_kobj, "security");
+ +      if (retval)
+ +              return retval;
   
         retval = register_filesystem(&fs_type);
         if (retval)
- -              kobject_put(security_kobj);
+ +              sysfs_remove_mount_point(kernel_kobj, "security");
         return retval;
   }
author	Linus Torvalds <[email protected]>
	Sun, 5 Jul 2015 02:36:06 +0000 (19:36 -0700)
committer	Linus Torvalds <[email protected]>
	Sun, 5 Jul 2015 02:36:06 +0000 (19:36 -0700)
		1	2
Documentation/filesystems/porting	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arc/kernel/troubleshoot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/hypfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/tile/kernel/stack.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/loop.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bitmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/md.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/binfmt_elf.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/block_dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ceph/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/coredump.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/dax.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/dcache.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/debugfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fuse/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/libfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/dir.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ntfs/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/overlayfs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/seq_file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/tracefs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ufs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_file.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/pagemap.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memory.c	patch \|	diff1 \|	diff2 \|	blob \| history
security/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history