Merge tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdim...

author Linus Torvalds <[email protected]>

Mon, 11 Sep 2017 20:10:57 +0000 (13:10 -0700)

committer Linus Torvalds <[email protected]>

Mon, 11 Sep 2017 20:10:57 +0000 (13:10 -0700)
author Linus Torvalds <[email protected]>
Mon, 11 Sep 2017 20:10:57 +0000 (13:10 -0700)
committer Linus Torvalds <[email protected]>
Mon, 11 Sep 2017 20:10:57 +0000 (13:10 -0700)
diff --combined arch/x86/Kconfig

index a3e6e6136a47ad9ad01917c8f244dd2dd9d0b0ed,87602cef7aba07ce1b459356c3a554102417a949..971feac135060d371e130680b54034f9dd39556a
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -53,10 -53,7 +53,9 @@@ config X8
         select ARCH_HAS_FORTIFY_SOURCE
         select ARCH_HAS_GCOV_PROFILE_ALL
         select ARCH_HAS_KCOV                    if X86_64
-       select ARCH_HAS_MMIO_FLUSH
         select ARCH_HAS_PMEM_API                if X86_64
+ +      # Causing hangs/crashes, see the commit that added this change for details.
+ +      select ARCH_HAS_REFCOUNT                if BROKEN
         select ARCH_HAS_UACCESS_FLUSHCACHE      if X86_64
         select ARCH_HAS_SET_MEMORY
         select ARCH_HAS_SG_CHAIN
@@@ -75,6 -72,7 +74,6 @@@
         select ARCH_USE_QUEUED_RWLOCKS
         select ARCH_USE_QUEUED_SPINLOCKS
         select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
- -      select ARCH_WANT_FRAME_POINTERS
         select ARCH_WANTS_DYNAMIC_TASK_STRUCT
         select ARCH_WANTS_THP_SWAP              if X86_64
         select BUILDTIME_EXTABLE_SORT
@@@ -159,7 -157,6 +158,7 @@@
         select HAVE_MEMBLOCK
         select HAVE_MEMBLOCK_NODE_MAP
         select HAVE_MIXED_BREAKPOINTS_REGS
+ +      select HAVE_MOD_ARCH_SPECIFIC
         select HAVE_NMI
         select HAVE_OPROFILE
         select HAVE_OPTPROBES
@@@ -169,9 -166,8 +168,9 @@@
         select HAVE_HARDLOCKUP_DETECTOR_PERF    if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
         select HAVE_PERF_REGS
         select HAVE_PERF_USER_STACK_DUMP
+ +      select HAVE_RCU_TABLE_FREE
         select HAVE_REGS_AND_STACK_ACCESS_API
- -      select HAVE_RELIABLE_STACKTRACE         if X86_64 && FRAME_POINTER && STACK_VALIDATION
+ +      select HAVE_RELIABLE_STACKTRACE         if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
         select HAVE_STACK_VALIDATION            if X86_64
         select HAVE_SYSCALL_TRACEPOINTS
         select HAVE_UNSTABLE_SCHED_CLOCK
@@@ -330,7 -326,6 +329,7 @@@ config FIX_EARLYCON_ME
   
   config PGTABLE_LEVELS
         int
+ +      default 5 if X86_5LEVEL
         default 4 if X86_64
         default 3 if X86_PAE
         default 2
@@@ -429,16 -424,16 +428,16 @@@ config GOLDFIS
          def_bool y
          depends on X86_GOLDFISH
   
- -config INTEL_RDT_A
- -      bool "Intel Resource Director Technology Allocation support"
+ +config INTEL_RDT
+ +      bool "Intel Resource Director Technology support"
         default n
         depends on X86 && CPU_SUP_INTEL
         select KERNFS
         help
- -        Select to enable resource allocation which is a sub-feature of
- -        Intel Resource Director Technology(RDT). More information about
- -        RDT can be found in the Intel x86 Architecture Software
- -        Developer Manual.
+ +        Select to enable resource allocation and monitoring which are
+ +        sub-features of Intel Resource Director Technology(RDT). More
+ +        information about RDT can be found in the Intel x86
+ +        Architecture Software Developer Manual.
   
           Say N if unsure.
   
@@@ -782,6 -777,8 +781,6 @@@ config KVM_DEBUG_F
           Statistics are displayed in debugfs filesystem. Enabling this option
           may incur significant overhead.
   
- -source "arch/x86/lguest/Kconfig"
- -
   config PARAVIRT_TIME_ACCOUNTING
         bool "Paravirtual steal time accounting"
         depends on PARAVIRT
@@@ -1401,24 -1398,6 +1400,24 @@@ config X86_PA
           has the cost of more pagetable lookup overhead, and also
           consumes more pagetable space per process.
   
+ +config X86_5LEVEL
+ +      bool "Enable 5-level page tables support"
+ +      depends on X86_64
+ +      ---help---
+ +        5-level paging enables access to larger address space:
+ +        upto 128 PiB of virtual address space and 4 PiB of
+ +        physical address space.
+ +
+ +        It will be supported by future Intel CPUs.
+ +
+ +        Note: a kernel with this option enabled can only be booted
+ +        on machines that support the feature.
+ +
+ +        See Documentation/x86/x86_64/5level-paging.txt for more
+ +        information.
+ +
+ +        Say N if unsure.
+ +
   config ARCH_PHYS_ADDR_T_64BIT
         def_bool y
         depends on X86_64 || X86_PAE
@@@ -1436,35 -1415,6 +1435,35 @@@ config X86_DIRECT_GBPAGE
           supports them), so don't confuse the user by printing
           that we have them enabled.
   
+ +config ARCH_HAS_MEM_ENCRYPT
+ +      def_bool y
+ +
+ +config AMD_MEM_ENCRYPT
+ +      bool "AMD Secure Memory Encryption (SME) support"
+ +      depends on X86_64 && CPU_SUP_AMD
+ +      ---help---
+ +        Say yes to enable support for the encryption of system memory.
+ +        This requires an AMD processor that supports Secure Memory
+ +        Encryption (SME).
+ +
+ +config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
+ +      bool "Activate AMD Secure Memory Encryption (SME) by default"
+ +      default y
+ +      depends on AMD_MEM_ENCRYPT
+ +      ---help---
+ +        Say yes to have system memory encrypted by default if running on
+ +        an AMD processor that supports Secure Memory Encryption (SME).
+ +
+ +        If set to Y, then the encryption of system memory can be
+ +        deactivated with the mem_encrypt=off command line option.
+ +
+ +        If set to N, then the encryption of system memory can be
+ +        activated with the mem_encrypt=on command line option.
+ +
+ +config ARCH_USE_MEMREMAP_PROT
+ +      def_bool y
+ +      depends on AMD_MEM_ENCRYPT
+ +
   # Common NUMA Features
   config NUMA
         bool "Numa Memory Allocation and Scheduler Support"
@@@ -1806,9 -1756,7 +1805,9 @@@ config X86_SMA
   config X86_INTEL_MPX
         prompt "Intel MPX (Memory Protection Extensions)"
         def_bool n
- -      depends on CPU_SUP_INTEL
+ +      # Note: only available in 64-bit mode due to VMA flags shortage
+ +      depends on CPU_SUP_INTEL && X86_64
+ +      select ARCH_USES_HIGH_VMA_FLAGS
         ---help---
           MPX provides hardware features that can be used in
           conjunction with compiler-instrumented code to check
@@@ -2323,10 -2271,6 +2322,10 @@@ source "kernel/livepatch/Kconfig
   
   endmenu
   
+ +config ARCH_HAS_ADD_PAGES
+ +      def_bool y
+ +      depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG
+ +
   config ARCH_ENABLE_MEMORY_HOTPLUG
         def_bool y
         depends on X86_64 || (X86_32 && HIGHMEM)
@@@ -2347,10 -2291,6 +2346,10 @@@ config ARCH_ENABLE_HUGEPAGE_MIGRATIO
         def_bool y
         depends on X86_64 && HUGETLB_PAGE && MIGRATION
   
+ +config ARCH_ENABLE_THP_MIGRATION
+ +      def_bool y
+ +      depends on X86_64 && TRANSPARENT_HUGEPAGE
+ +
   menu "Power management and ACPI options"
   
   config ARCH_HIBERNATION_HEADER
diff --combined drivers/acpi/nfit/core.c

index 1893e416e7c0d95a88854670ddef631590bdb8bc,42221e785c47b9f52a51572214e41973a9b96cd0..9c2c49b6a240d55164ee8c39ce64e1d595072c62
--- 1/drivers/acpi/nfit/core.c
--- 2/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@@ -228,6 -228,10 +228,10 @@@ int acpi_nfit_ctl(struct nvdimm_bus_des
         if (cmd == ND_CMD_CALL) {
                 call_pkg = buf;
                 func = call_pkg->nd_command;
+ 
+               for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++)
+                       if (call_pkg->nd_reserved2[i])
+                               return -EINVAL;
         }
   
         if (nvdimm) {
@@@ -1674,8 -1678,19 +1678,19 @@@ static ssize_t range_index_show(struct 
   }
   static DEVICE_ATTR_RO(range_index);
   
+ static ssize_t ecc_unit_size_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+ {
+       struct nd_region *nd_region = to_nd_region(dev);
+       struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
+ 
+       return sprintf(buf, "%d\n", nfit_spa->clear_err_unit);
+ }
+ static DEVICE_ATTR_RO(ecc_unit_size);
+ 
   static struct attribute *acpi_nfit_region_attributes[] = {
         &dev_attr_range_index.attr,
+       &dev_attr_ecc_unit_size.attr,
         NULL,
   };
   
@@@ -1804,6 -1819,7 +1819,7 @@@ static int acpi_nfit_init_interleave_se
                 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
                 struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
                                 spa->range_index, i);
+               struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
   
                 if (!memdev || !nfit_mem->dcr) {
                         dev_err(dev, "%s: failed to find DCR\n", __func__);
@@@ -1811,13 -1827,13 +1827,13 @@@
                 }
   
                 map->region_offset = memdev->region_offset;
-               map->serial_number = nfit_mem->dcr->serial_number;
+               map->serial_number = dcr->serial_number;
   
                 map2->region_offset = memdev->region_offset;
-               map2->serial_number = nfit_mem->dcr->serial_number;
-               map2->vendor_id = nfit_mem->dcr->vendor_id;
-               map2->manufacturing_date = nfit_mem->dcr->manufacturing_date;
-               map2->manufacturing_location = nfit_mem->dcr->manufacturing_location;
+               map2->serial_number = dcr->serial_number;
+               map2->vendor_id = dcr->vendor_id;
+               map2->manufacturing_date = dcr->manufacturing_date;
+               map2->manufacturing_location = dcr->manufacturing_location;
         }
   
         /* v1.1 namespaces */
@@@ -1835,6 -1851,28 +1851,28 @@@
                         cmp_map_compat, NULL);
         nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
   
+       /* record the result of the sort for the mapping position */
+       for (i = 0; i < nr; i++) {
+               struct nfit_set_info_map2 *map2 = &info2->mapping[i];
+               int j;
+ 
+               for (j = 0; j < nr; j++) {
+                       struct nd_mapping_desc *mapping = &ndr_desc->mapping[j];
+                       struct nvdimm *nvdimm = mapping->nvdimm;
+                       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+                       struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
+ 
+                       if (map2->serial_number == dcr->serial_number &&
+                           map2->vendor_id == dcr->vendor_id &&
+                           map2->manufacturing_date == dcr->manufacturing_date &&
+                           map2->manufacturing_location
+                                   == dcr->manufacturing_location) {
+                               mapping->position = i;
+                               break;
+                       }
+               }
+       }
+ 
         ndr_desc->nd_set = nd_set;
         devm_kfree(dev, info);
         devm_kfree(dev, info2);
@@@ -1930,7 -1968,7 +1968,7 @@@ static int acpi_nfit_blk_single_io(stru
                         memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c);
                 else {
                         if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
-                               mmio_flush_range((void __force *)
+                               arch_invalidate_pmem((void __force *)
                                         mmio->addr.aperture + offset, c);
   
                         memcpy(iobuf + copied, mmio->addr.aperture + offset, c);
@@@ -2884,7 -2922,7 +2922,7 @@@ static int acpi_nfit_flush_probe(struc
          * need to be interruptible while waiting.
          */
         INIT_WORK_ONSTACK(&flush.work, flush_probe);
- -      COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
+ +      init_completion(&flush.cmp);
         queue_work(nfit_wq, &flush.work);
         mutex_unlock(&acpi_desc->init_mutex);
   
diff --combined drivers/nvdimm/btt.c

index 60491641a8d67c8e05d7484f929ac897ac511a58,b9008c3f0d1789766d1268d58f9316575240fd32..d5612bd1cc81cc4306f383ed7d1448cd3b487293
--- 1/drivers/nvdimm/btt.c
--- 2/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@@ -31,6 -31,16 +31,16 @@@ enum log_ent_request 
         LOG_OLD_ENT
   };
   
+ static struct device *to_dev(struct arena_info *arena)
+ {
+       return &arena->nd_btt->dev;
+ }
+ 
+ static u64 adjust_initial_offset(struct nd_btt *nd_btt, u64 offset)
+ {
+       return offset + nd_btt->initial_offset;
+ }
+ 
   static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
                 void *buf, size_t n, unsigned long flags)
   {
@@@ -38,7 -48,7 +48,7 @@@
         struct nd_namespace_common *ndns = nd_btt->ndns;
   
         /* arena offsets may be shifted from the base of the device */
-       offset += arena->nd_btt->initial_offset;
+       offset = adjust_initial_offset(nd_btt, offset);
         return nvdimm_read_bytes(ndns, offset, buf, n, flags);
   }
   
@@@ -49,7 -59,7 +59,7 @@@ static int arena_write_bytes(struct are
         struct nd_namespace_common *ndns = nd_btt->ndns;
   
         /* arena offsets may be shifted from the base of the device */
-       offset += arena->nd_btt->initial_offset;
+       offset = adjust_initial_offset(nd_btt, offset);
         return nvdimm_write_bytes(ndns, offset, buf, n, flags);
   }
   
@@@ -62,8 -72,10 +72,10 @@@ static int btt_info_write(struct arena_
          * We rely on that to make sure rw_bytes does error clearing
          * correctly, so make sure that is the case.
          */
-       WARN_ON_ONCE(!IS_ALIGNED(arena->infooff, 512));
-       WARN_ON_ONCE(!IS_ALIGNED(arena->info2off, 512));
+       dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->infooff, 512),
+               "arena->infooff: %#llx is unaligned\n", arena->infooff);
+       dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
+               "arena->info2off: %#llx is unaligned\n", arena->info2off);
   
         ret = arena_write_bytes(arena, arena->info2off, super,
                         sizeof(struct btt_sb), 0);
@@@ -76,7 -88,6 +88,6 @@@
   
   static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
   {
-       WARN_ON(!super);
         return arena_read_bytes(arena, arena->infooff, super,
                         sizeof(struct btt_sb), 0);
   }
@@@ -92,7 -103,10 +103,10 @@@ static int __btt_map_write(struct arena
   {
         u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
   
-       WARN_ON(lba >= arena->external_nlba);
+       if (unlikely(lba >= arena->external_nlba))
+               dev_err_ratelimited(to_dev(arena),
+                       "%s: lba %#x out of range (max: %#x)\n",
+                       __func__, lba, arena->external_nlba);
         return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags);
   }
   
@@@ -106,7 -120,7 +120,7 @@@ static int btt_map_write(struct arena_i
          * This 'mapping' is supposed to be just the LBA mapping, without
          * any flags set, so strip the flag bits.
          */
-       mapping &= MAP_LBA_MASK;
+       mapping = ent_lba(mapping);
   
         ze = (z_flag << 1) + e_flag;
         switch (ze) {
@@@ -131,7 -145,8 +145,8 @@@
                  * construed as a valid 'normal' case, but we decide not to,
                  * to avoid confusion
                  */
-               WARN_ONCE(1, "Invalid use of Z and E flags\n");
+               dev_err_ratelimited(to_dev(arena),
+                       "Invalid use of Z and E flags\n");
                 return -EIO;
         }
   
@@@ -147,7 -162,10 +162,10 @@@ static int btt_map_read(struct arena_in
         u32 raw_mapping, postmap, ze, z_flag, e_flag;
         u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
   
-       WARN_ON(lba >= arena->external_nlba);
+       if (unlikely(lba >= arena->external_nlba))
+               dev_err_ratelimited(to_dev(arena),
+                       "%s: lba %#x out of range (max: %#x)\n",
+                       __func__, lba, arena->external_nlba);
   
         ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags);
         if (ret)
@@@ -155,10 -173,10 +173,10 @@@
   
         raw_mapping = le32_to_cpu(in);
   
-       z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
-       e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
+       z_flag = ent_z_flag(raw_mapping);
+       e_flag = ent_e_flag(raw_mapping);
         ze = (z_flag << 1) + e_flag;
-       postmap = raw_mapping & MAP_LBA_MASK;
+       postmap = ent_lba(raw_mapping);
   
         /* Reuse the {z,e}_flag variables for *trim and *error */
         z_flag = 0;
@@@ -195,7 -213,6 +213,6 @@@
   static int btt_log_read_pair(struct arena_info *arena, u32 lane,
                         struct log_entry *ent)
   {
-       WARN_ON(!ent);
         return arena_read_bytes(arena,
                         arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
                         2 * LOG_ENT_SIZE, 0);
@@@ -299,11 -316,6 +316,6 @@@ static int btt_log_get_old(struct log_e
         return old;
   }
   
- static struct device *to_dev(struct arena_info *arena)
- {
-       return &arena->nd_btt->dev;
- }
- 
   /*
    * This function copies the desired (old/new) log entry into ent if
    * it is not NULL. It returns the sub-slot number (0 or 1)
@@@ -381,7 -393,9 +393,9 @@@ static int btt_flog_write(struct arena_
         arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
         if (++(arena->freelist[lane].seq) == 4)
                 arena->freelist[lane].seq = 1;
-       arena->freelist[lane].block = le32_to_cpu(ent->old_map);
+       if (ent_e_flag(ent->old_map))
+               arena->freelist[lane].has_err = 1;
+       arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map));
   
         return ret;
   }
@@@ -407,12 -421,14 +421,14 @@@ static int btt_map_init(struct arena_in
          * make sure rw_bytes does error clearing correctly, so make sure that
          * is the case.
          */
-       WARN_ON_ONCE(!IS_ALIGNED(arena->mapoff, 512));
+       dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->mapoff, 512),
+               "arena->mapoff: %#llx is unaligned\n", arena->mapoff);
   
         while (mapsize) {
                 size_t size = min(mapsize, chunk_size);
   
-               WARN_ON_ONCE(size < 512);
+               dev_WARN_ONCE(to_dev(arena), size < 512,
+                       "chunk size: %#zx is unaligned\n", size);
                 ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
                                 size, 0);
                 if (ret)
@@@ -449,12 -465,14 +465,14 @@@ static int btt_log_init(struct arena_in
          * make sure rw_bytes does error clearing correctly, so make sure that
          * is the case.
          */
-       WARN_ON_ONCE(!IS_ALIGNED(arena->logoff, 512));
+       dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->logoff, 512),
+               "arena->logoff: %#llx is unaligned\n", arena->logoff);
   
         while (logsize) {
                 size_t size = min(logsize, chunk_size);
   
-               WARN_ON_ONCE(size < 512);
+               dev_WARN_ONCE(to_dev(arena), size < 512,
+                       "chunk size: %#zx is unaligned\n", size);
                 ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
                                 size, 0);
                 if (ret)
@@@ -480,6 -498,40 +498,40 @@@
         return ret;
   }
   
+ static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
+ {
+       return arena->dataoff + ((u64)lba * arena->internal_lbasize);
+ }
+ 
+ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
+ {
+       int ret = 0;
+ 
+       if (arena->freelist[lane].has_err) {
+               void *zero_page = page_address(ZERO_PAGE(0));
+               u32 lba = arena->freelist[lane].block;
+               u64 nsoff = to_namespace_offset(arena, lba);
+               unsigned long len = arena->sector_size;
+ 
+               mutex_lock(&arena->err_lock);
+ 
+               while (len) {
+                       unsigned long chunk = min(len, PAGE_SIZE);
+ 
+                       ret = arena_write_bytes(arena, nsoff, zero_page,
+                               chunk, 0);
+                       if (ret)
+                               break;
+                       len -= chunk;
+                       nsoff += chunk;
+                       if (len == 0)
+                               arena->freelist[lane].has_err = 0;
+               }
+               mutex_unlock(&arena->err_lock);
+       }
+       return ret;
+ }
+ 
   static int btt_freelist_init(struct arena_info *arena)
   {
         int old, new, ret;
@@@ -505,6 -557,17 +557,17 @@@
                 arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
                 arena->freelist[i].block = le32_to_cpu(log_new.old_map);
   
+               /*
+                * FIXME: if error clearing fails during init, we want to make
+                * the BTT read-only
+                */
+               if (ent_e_flag(log_new.old_map)) {
+                       ret = arena_clear_freelist_error(arena, i);
+                       if (ret)
+                               dev_err_ratelimited(to_dev(arena),
+                                       "Unable to clear known errors\n");
+               }
+ 
                 /* This implies a newly created or untouched flog entry */
                 if (log_new.old_map == log_new.new_map)
                         continue;
@@@ -525,7 -588,6 +588,6 @@@
                         if (ret)
                                 return ret;
                 }
- 
         }
   
         return 0;
@@@ -566,6 -628,7 +628,7 @@@ static struct arena_info *alloc_arena(s
         if (!arena)
                 return NULL;
         arena->nd_btt = btt->nd_btt;
+       arena->sector_size = btt->sector_size;
   
         if (!size)
                 return arena;
@@@ -694,6 -757,7 +757,7 @@@ static int discover_arenas(struct btt *
                 arena->external_lba_start = cur_nlba;
                 parse_arena_meta(arena, super, cur_off);
   
+               mutex_init(&arena->err_lock);
                 ret = btt_freelist_init(arena);
                 if (ret)
                         goto out;
@@@ -904,11 -968,6 +968,6 @@@ static void unlock_map(struct arena_inf
         spin_unlock(&arena->map_locks[idx].lock);
   }
   
- static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
- {
-       return arena->dataoff + ((u64)lba * arena->internal_lbasize);
- }
- 
   static int btt_data_read(struct arena_info *arena, struct page *page,
                         unsigned int off, u32 lba, u32 len)
   {
@@@ -1032,6 -1091,7 +1091,7 @@@ static int btt_read_pg(struct btt *btt
                  */
                 while (1) {
                         u32 new_map;
+                       int new_t, new_e;
   
                         if (t_flag) {
                                 zero_fill_data(page, off, cur_len);
@@@ -1050,20 -1110,29 +1110,29 @@@
                          */
                         barrier();
   
-                       ret = btt_map_read(arena, premap, &new_map, &t_flag,
-                                               &e_flag, NVDIMM_IO_ATOMIC);
+                       ret = btt_map_read(arena, premap, &new_map, &new_t,
+                                               &new_e, NVDIMM_IO_ATOMIC);
                         if (ret)
                                 goto out_rtt;
   
-                       if (postmap == new_map)
+                       if ((postmap == new_map) && (t_flag == new_t) &&
+                                       (e_flag == new_e))
                                 break;
   
                         postmap = new_map;
+                       t_flag = new_t;
+                       e_flag = new_e;
                 }
   
                 ret = btt_data_read(arena, page, off, postmap, cur_len);
-               if (ret)
+               if (ret) {
+                       int rc;
+ 
+                       /* Media error - set the e_flag */
+                       rc = btt_map_write(arena, premap, postmap, 0, 1,
+                               NVDIMM_IO_ATOMIC);
                         goto out_rtt;
+               }
   
                 if (bip) {
                         ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
@@@ -1088,6 -1157,21 +1157,21 @@@
         return ret;
   }
   
+ /*
+  * Normally, arena_{read,write}_bytes will take care of the initial offset
+  * adjustment, but in the case of btt_is_badblock, where we query is_bad_pmem,
+  * we need the final, raw namespace offset here
+  */
+ static bool btt_is_badblock(struct btt *btt, struct arena_info *arena,
+               u32 postmap)
+ {
+       u64 nsoff = adjust_initial_offset(arena->nd_btt,
+                       to_namespace_offset(arena, postmap));
+       sector_t phys_sector = nsoff >> 9;
+ 
+       return is_bad_pmem(btt->phys_bb, phys_sector, arena->internal_lbasize);
+ }
+ 
   static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
                         sector_t sector, struct page *page, unsigned int off,
                         unsigned int len)
@@@ -1100,7 -1184,9 +1184,9 @@@
   
         while (len) {
                 u32 cur_len;
+               int e_flag;
   
+  retry:
                 lane = nd_region_acquire_lane(btt->nd_region);
   
                 ret = lba_to_arena(btt, sector, &premap, &arena);
@@@ -1113,6 -1199,21 +1199,21 @@@
                         goto out_lane;
                 }
   
+               if (btt_is_badblock(btt, arena, arena->freelist[lane].block))
+                       arena->freelist[lane].has_err = 1;
+ 
+               if (mutex_is_locked(&arena->err_lock)
+                               || arena->freelist[lane].has_err) {
+                       nd_region_release_lane(btt->nd_region, lane);
+ 
+                       ret = arena_clear_freelist_error(arena, lane);
+                       if (ret)
+                               return ret;
+ 
+                       /* OK to acquire a different lane/free block */
+                       goto retry;
+               }
+ 
                 new_postmap = arena->freelist[lane].block;
   
                 /* Wait if the new block is being read from */
@@@ -1138,7 -1239,7 +1239,7 @@@
                 }
   
                 lock_map(arena, premap);
-               ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL,
+               ret = btt_map_read(arena, premap, &old_postmap, NULL, &e_flag,
                                 NVDIMM_IO_ATOMIC);
                 if (ret)
                         goto out_map;
@@@ -1146,6 -1247,8 +1247,8 @@@
                         ret = -EIO;
                         goto out_map;
                 }
+               if (e_flag)
+                       set_e_flag(old_postmap);
   
                 log.lba = cpu_to_le32(premap);
                 log.old_map = cpu_to_le32(old_postmap);
@@@ -1156,13 -1259,20 +1259,20 @@@
                 if (ret)
                         goto out_map;
   
-               ret = btt_map_write(arena, premap, new_postmap, 0, 0, 0);
+               ret = btt_map_write(arena, premap, new_postmap, 0, 0,
+                       NVDIMM_IO_ATOMIC);
                 if (ret)
                         goto out_map;
   
                 unlock_map(arena, premap);
                 nd_region_release_lane(btt->nd_region, lane);
   
+               if (e_flag) {
+                       ret = arena_clear_freelist_error(arena, lane);
+                       if (ret)
+                               return ret;
+               }
+ 
                 len -= cur_len;
                 off += cur_len;
                 sector += btt->sector_size >> SECTOR_SHIFT;
@@@ -1211,11 -1321,13 +1321,13 @@@ static blk_qc_t btt_make_request(struc
         bio_for_each_segment(bvec, bio, iter) {
                 unsigned int len = bvec.bv_len;
   
-               BUG_ON(len > PAGE_SIZE);
-               /* Make sure len is in multiples of sector size. */
-               /* XXX is this right? */
-               BUG_ON(len < btt->sector_size);
-               BUG_ON(len % btt->sector_size);
+               if (len > PAGE_SIZE || len < btt->sector_size ||
+                               len % btt->sector_size) {
+                       dev_err_ratelimited(&btt->nd_btt->dev,
+                               "unaligned bio segment (len: %d)\n", len);
+                       bio->bi_status = BLK_STS_IOERR;
+                       break;
+               }
   
                 err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
                                   op_is_write(bio_op(bio)), iter.bi_sector);
@@@ -1241,10 -1353,8 +1353,10 @@@ static int btt_rw_page(struct block_dev
   {
         struct btt *btt = bdev->bd_disk->private_data;
         int rc;
+ +      unsigned int len;
   
- -      rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
+ +      len = hpage_nr_pages(page) * PAGE_SIZE;
+ +      rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector);
         if (rc == 0)
                 page_endio(page, is_write, 0);
   
@@@ -1345,6 -1455,7 +1457,7 @@@ static struct btt *btt_init(struct nd_b
   {
         int ret;
         struct btt *btt;
+       struct nd_namespace_io *nsio;
         struct device *dev = &nd_btt->dev;
   
         btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
@@@ -1358,6 -1469,8 +1471,8 @@@
         INIT_LIST_HEAD(&btt->arena_list);
         mutex_init(&btt->init_lock);
         btt->nd_region = nd_region;
+       nsio = to_nd_namespace_io(&nd_btt->ndns->dev);
+       btt->phys_bb = &nsio->bb;
   
         ret = discover_arenas(btt);
         if (ret) {
@@@ -1431,6 -1544,8 +1546,8 @@@ int nvdimm_namespace_attach_btt(struct 
         }
   
         btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL);
+       if (!btt_sb)
+               return -ENOMEM;
   
         /*
          * If this returns < 0, that is ok as it just means there wasn't
diff --combined drivers/nvdimm/nd.h

index a87f793f2945ec4b46a283fa482f746b94996c15,023fc93e21a5f23fbd8c7d847a3060ae0862cd35..9c758a91372bbf6c72f9ca9a9af99615ec965426
--- 1/drivers/nvdimm/nd.h
--- 2/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@@ -42,7 -42,7 +42,7 @@@ struct nd_poison 
   
   struct nvdimm_drvdata {
         struct device *dev;
-       int nsindex_size, nslabel_size;
+       int nslabel_size;
         struct nd_cmd_get_config_size nsarea;
         void *data;
         int ns_current, ns_next;
@@@ -134,6 -134,7 +134,7 @@@ struct nd_mapping 
         struct nvdimm *nvdimm;
         u64 start;
         u64 size;
+       int position;
         struct list_head labels;
         struct mutex lock;
         /*
@@@ -233,10 -234,10 +234,10 @@@ void nd_device_unregister(struct devic
   void nd_device_notify(struct device *dev, enum nvdimm_event event);
   int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
                 size_t len);
- ssize_t nd_sector_size_show(unsigned long current_lbasize,
+ ssize_t nd_size_select_show(unsigned long current_size,
                 const unsigned long *supported, char *buf);
- ssize_t nd_sector_size_store(struct device *dev, const char *buf,
-               unsigned long *current_lbasize, const unsigned long *supported);
+ ssize_t nd_size_select_store(struct device *dev, const char *buf,
+               unsigned long *current_size, const unsigned long *supported);
   int __init nvdimm_init(void);
   int __init nd_region_init(void);
   int __init nd_label_init(void);
@@@ -285,6 -286,13 +286,13 @@@ static inline struct device *nd_btt_cre
   
   struct nd_pfn *to_nd_pfn(struct device *dev);
   #if IS_ENABLED(CONFIG_NVDIMM_PFN)
+ 
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ #define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE
+ #else
+ #define PFN_DEFAULT_ALIGNMENT PAGE_SIZE
+ #endif
+ 
   int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
   bool is_nd_pfn(struct device *dev);
   struct device *nd_pfn_create(struct nd_region *nd_region);
@@@ -390,22 -398,21 +398,22 @@@ int nd_region_activate(struct nd_regio
   void __nd_iostat_start(struct bio *bio, unsigned long *start);
   static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
   {
- -      struct gendisk *disk = bio->bi_bdev->bd_disk;
+ +      struct gendisk *disk = bio->bi_disk;
   
         if (!blk_queue_io_stat(disk->queue))
                 return false;
   
         *start = jiffies;
- -      generic_start_io_acct(bio_data_dir(bio),
+ +      generic_start_io_acct(disk->queue, bio_data_dir(bio),
                               bio_sectors(bio), &disk->part0);
         return true;
   }
   static inline void nd_iostat_end(struct bio *bio, unsigned long start)
   {
- -      struct gendisk *disk = bio->bi_bdev->bd_disk;
+ +      struct gendisk *disk = bio->bi_disk;
   
- -      generic_end_io_acct(bio_data_dir(bio), &disk->part0, start);
+ +      generic_end_io_acct(disk->queue, bio_data_dir(bio), &disk->part0,
+ +                              start);
   }
   static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
                 unsigned int len)
diff --combined fs/ext4/ext4.h

index 84b9da1922387999d315cbd442b4a7e6dc92ab49,194e622dc3dd942b37b460d4c49405aa64083c3c..e2abe01c8c6bac60e04ef26fd4559c854fbef280
--- 1/fs/ext4/ext4.h
--- 2/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@@ -838,11 -838,13 +838,11 @@@ static inline void ext4_decode_extra_ti
   {
         if (unlikely(sizeof(time->tv_sec) > 4 &&
                         (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) {
- -#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0)
+ +
+ +#if 1
                 /* Handle legacy encoding of pre-1970 dates with epoch
- -               * bits 1,1.  We assume that by kernel version 4.20,
- -               * everyone will have run fsck over the affected
- -               * filesystems to correct the problem.  (This
- -               * backwards compatibility may be removed before this
- -               * time, at the discretion of the ext4 developers.)
+ +               * bits 1,1. (This backwards compatibility may be removed
+ +               * at the discretion of the ext4 developers.)
                  */
                 u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK;
                 if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0)
@@@ -1526,6 -1528,7 +1526,7 @@@ struct ext4_sb_info 
   
         /* Barrier between changing inodes' journal flags and writepages ops. */
         struct percpu_rw_semaphore s_journal_flag_rwsem;
+       struct dax_device *s_daxdev;
   };
   
   static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@@ -1565,7 -1568,6 +1566,7 @@@ enum 
                                            nolocking */
         EXT4_STATE_MAY_INLINE_DATA,     /* may have in-inode data */
         EXT4_STATE_EXT_PRECACHED,       /* extents have been precached */
+ +      EXT4_STATE_LUSTRE_EA_INODE,     /* Lustre-style ea_inode */
   };
   
   #define EXT4_INODE_BIT_FNS(name, field, offset)                               \
diff --combined fs/ext4/inode.c

index e963508ea35ffae472a1cd0800b104fe23fc0b52,16424b5c4e885b7d9f075cd4c850e8bda0cf5394..31db875bc7a13dde67b9a6b17ce498b4c16bd17f
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -1720,12 -1720,13 +1720,12 @@@ static void mpage_release_unused_pages(
   
         pagevec_init(&pvec, 0);
         while (index <= end) {
- -              nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+ +              nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end);
                 if (nr_pages == 0)
                         break;
                 for (i = 0; i < nr_pages; i++) {
                         struct page *page = pvec.pages[i];
- -                      if (page->index > end)
- -                              break;
+ +
                         BUG_ON(!PageLocked(page));
                         BUG_ON(PageWriteback(page));
                         if (invalidate) {
@@@ -1736,6 -1737,7 +1736,6 @@@
                         }
                         unlock_page(page);
                 }
- -              index = pvec.pages[nr_pages - 1]->index + 1;
                 pagevec_release(&pvec);
         }
   }
@@@ -2346,13 -2348,17 +2346,13 @@@ static int mpage_map_and_submit_buffers
   
         pagevec_init(&pvec, 0);
         while (start <= end) {
- -              nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
- -                                        PAGEVEC_SIZE);
+ +              nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
+ +                                              &start, end);
                 if (nr_pages == 0)
                         break;
                 for (i = 0; i < nr_pages; i++) {
                         struct page *page = pvec.pages[i];
   
- -                      if (page->index > end)
- -                              break;
- -                      /* Up to 'end' pages must be contiguous */
- -                      BUG_ON(page->index != start);
                         bh = head = page_buffers(page);
                         do {
                                 if (lblk < mpd->map.m_lblk)
@@@ -2397,6 -2403,7 +2397,6 @@@
                                 pagevec_release(&pvec);
                                 return err;
                         }
- -                      start++;
                 }
                 pagevec_release(&pvec);
         }
@@@ -3397,7 -3404,7 +3397,7 @@@ static int ext4_releasepage(struct pag
   static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
                             unsigned flags, struct iomap *iomap)
   {
-       struct block_device *bdev;
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         unsigned int blkbits = inode->i_blkbits;
         unsigned long first_block = offset >> blkbits;
         unsigned long last_block = (offset + length - 1) >> blkbits;
@@@ -3466,12 -3473,8 +3466,8 @@@ retry
         }
   
         iomap->flags = 0;
-       bdev = inode->i_sb->s_bdev;
-       iomap->bdev = bdev;
-       if (blk_queue_dax(bdev->bd_queue))
-               iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
-       else
-               iomap->dax_dev = NULL;
+       iomap->bdev = inode->i_sb->s_bdev;
+       iomap->dax_dev = sbi->s_daxdev;
         iomap->offset = first_block << blkbits;
   
         if (ret == 0) {
@@@ -3504,7 -3507,6 +3500,6 @@@ static int ext4_iomap_end(struct inode 
         int blkbits = inode->i_blkbits;
         bool truncate = false;
   
-       fs_put_dax(iomap->dax_dev);
         if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
                 return 0;
   
@@@ -4890,6 -4892,14 +4885,6 @@@ struct inode *ext4_iget(struct super_bl
         brelse(iloc.bh);
         ext4_set_inode_flags(inode);
   
- -      if (ei->i_flags & EXT4_EA_INODE_FL) {
- -              ext4_xattr_inode_set_class(inode);
- -
- -              inode_lock(inode);
- -              inode->i_flags |= S_NOQUOTA;
- -              inode_unlock(inode);
- -      }
- -
         unlock_new_inode(inode);
         return inode;
   
diff --combined fs/ext4/super.c

index 93aece6891f296b4ffa7f571f1cb84625e900311,55772b2d05ee5a19d1bf5357c7288fc1925a338f..71b9a667e1bc2281c701231a461538080a64ad23
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -951,6 -951,7 +951,7 @@@ static void ext4_put_super(struct super
         if (sbi->s_chksum_driver)
                 crypto_free_shash(sbi->s_chksum_driver);
         kfree(sbi->s_blockgroup_lock);
+       fs_put_dax(sbi->s_daxdev);
         kfree(sbi);
   }
   
@@@ -2404,7 -2405,6 +2405,7 @@@ static void ext4_orphan_cleanup(struct 
         unsigned int s_flags = sb->s_flags;
         int ret, nr_orphans = 0, nr_truncates = 0;
   #ifdef CONFIG_QUOTA
+ +      int quota_update = 0;
         int i;
   #endif
         if (!es->s_last_orphan) {
@@@ -2443,32 -2443,14 +2444,32 @@@
   #ifdef CONFIG_QUOTA
         /* Needed for iput() to work correctly and not trash data */
         sb->s_flags |= MS_ACTIVE;
- -      /* Turn on quotas so that they are updated correctly */
+ +
+ +      /*
+ +       * Turn on quotas which were not enabled for read-only mounts if
+ +       * filesystem has quota feature, so that they are updated correctly.
+ +       */
+ +      if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) {
+ +              int ret = ext4_enable_quotas(sb);
+ +
+ +              if (!ret)
+ +                      quota_update = 1;
+ +              else
+ +                      ext4_msg(sb, KERN_ERR,
+ +                              "Cannot turn on quotas: error %d", ret);
+ +      }
+ +
+ +      /* Turn on journaled quotas used for old sytle */
         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
                 if (EXT4_SB(sb)->s_qf_names[i]) {
                         int ret = ext4_quota_on_mount(sb, i);
- -                      if (ret < 0)
+ +
+ +                      if (!ret)
+ +                              quota_update = 1;
+ +                      else
                                 ext4_msg(sb, KERN_ERR,
                                         "Cannot turn on journaled "
- -                                      "quota: error %d", ret);
+ +                                      "quota: type %d: error %d", i, ret);
                 }
         }
   #endif
@@@ -2529,12 -2511,10 +2530,12 @@@
                 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
                        PLURAL(nr_truncates));
   #ifdef CONFIG_QUOTA
- -      /* Turn quotas off */
- -      for (i = 0; i < EXT4_MAXQUOTAS; i++) {
- -              if (sb_dqopt(sb)->files[i])
- -                      dquot_quota_off(sb, i);
+ +      /* Turn off quotas if they were enabled for orphan cleanup */
+ +      if (quota_update) {
+ +              for (i = 0; i < EXT4_MAXQUOTAS; i++) {
+ +                      if (sb_dqopt(sb)->files[i])
+ +                              dquot_quota_off(sb, i);
+ +              }
         }
   #endif
         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@@ -3398,6 -3378,7 +3399,7 @@@ static void ext4_set_resv_clusters(stru
   
   static int ext4_fill_super(struct super_block *sb, void *data, int silent)
   {
+       struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
         char *orig_data = kstrdup(data, GFP_KERNEL);
         struct buffer_head *bh;
         struct ext4_super_block *es = NULL;
@@@ -3423,6 -3404,7 +3425,7 @@@
         if ((data && !orig_data) || !sbi)
                 goto out_free_base;
   
+       sbi->s_daxdev = dax_dev;
         sbi->s_blockgroup_lock =
                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
         if (!sbi->s_blockgroup_lock)
@@@ -4399,6 -4381,7 +4402,7 @@@ out_fail
   out_free_base:
         kfree(sbi);
         kfree(orig_data);
+       fs_put_dax(dax_dev);
         return err ? err : ret;
   }
   
@@@ -5215,7 -5198,7 +5219,7 @@@ static int ext4_statfs_project(struct s
         dquot = dqget(sb, qid);
         if (IS_ERR(dquot))
                 return PTR_ERR(dquot);
- -      spin_lock(&dq_data_lock);
+ +      spin_lock(&dquot->dq_dqb_lock);
   
         limit = (dquot->dq_dqb.dqb_bsoftlimit ?
                  dquot->dq_dqb.dqb_bsoftlimit :
@@@ -5238,7 -5221,7 +5242,7 @@@
                          (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
         }
   
- -      spin_unlock(&dq_data_lock);
+ +      spin_unlock(&dquot->dq_dqb_lock);
         dqput(dquot);
         return 0;
   }
@@@ -5284,13 -5267,18 +5288,13 @@@ static int ext4_statfs(struct dentry *d
         return 0;
   }
   
- -/* Helper function for writing quotas on sync - we need to start transaction
- - * before quota file is locked for write. Otherwise the are possible deadlocks:
- - * Process 1                         Process 2
- - * ext4_create()                     quota_sync()
- - *   jbd2_journal_start()                  write_dquot()
- - *   dquot_initialize()                         down(dqio_mutex)
- - *     down(dqio_mutex)                    jbd2_journal_start()
- - *
- - */
   
   #ifdef CONFIG_QUOTA
   
+ +/*
+ + * Helper functions so that transaction is started before we acquire dqio_sem
+ + * to keep correct lock ordering of transaction > dqio_sem
+ + */
   static inline struct inode *dquot_to_inode(struct dquot *dquot)
   {
         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
@@@ -5425,13 -5413,6 +5429,13 @@@ static int ext4_quota_on(struct super_b
                         ext4_msg(sb, KERN_WARNING,
                                 "Quota file not on filesystem root. "
                                 "Journaled quota will not work");
+ +              sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
+ +      } else {
+ +              /*
+ +               * Clear the flag just in case mount options changed since
+ +               * last time.
+ +               */
+ +              sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
         }
   
         /*
@@@ -5528,16 -5509,13 +5532,16 @@@ static int ext4_enable_quotas(struct su
                 test_opt(sb, PRJQUOTA),
         };
   
- -      sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+ +      sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
         for (type = 0; type < EXT4_MAXQUOTAS; type++) {
                 if (qf_inums[type]) {
                         err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
                                 DQUOT_USAGE_ENABLED |
                                 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
                         if (err) {
+ +                              for (type--; type >= 0; type--)
+ +                                      dquot_quota_off(sb, type);
+ +
                                 ext4_warning(sb,
                                         "Failed to enable quota tracking "
                                         "(type=%d, err=%d). Please run "
diff --combined fs/xfs/xfs_aops.c

index fffae1390d7f493af03dfcc48d1a545190791d03,78185f3b10b2c3fd5a5702e7822b06ce1affb1d7..29172609f2a31b756cd40da7b42f288fe8b0915b
--- 1/fs/xfs/xfs_aops.c
--- 2/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@@ -80,16 -80,29 +80,29 @@@ xfs_find_bdev_for_inode
                 return mp->m_ddev_targp->bt_bdev;
   }
   
+ struct dax_device *
+ xfs_find_daxdev_for_inode(
+       struct inode            *inode)
+ {
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+ 
+       if (XFS_IS_REALTIME_INODE(ip))
+               return mp->m_rtdev_targp->bt_daxdev;
+       else
+               return mp->m_ddev_targp->bt_daxdev;
+ }
+ 
   /*
    * We're now finished for good with this page.  Update the page state via the
    * associated buffer_heads, paying attention to the start and end offsets that
    * we need to process on the page.
    *
- - * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
- - * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
- - * the page at all, as we may be racing with memory reclaim and it can free both
- - * the bufferhead chain and the page as it will see the page as clean and
- - * unused.
+ + * Note that we open code the action in end_buffer_async_write here so that we
+ + * only have to iterate over the buffers attached to the page once.  This is not
+ + * only more efficient, but also ensures that we only calls end_page_writeback
+ + * at the end of the iteration, and thus avoids the pitfall of having the page
+ + * and buffers potentially freed after every call to end_buffer_async_write.
    */
   static void
   xfs_finish_page_writeback(
@@@ -97,44 -110,29 +110,44 @@@
         struct bio_vec          *bvec,
         int                     error)
   {
- -      unsigned int            end = bvec->bv_offset + bvec->bv_len - 1;
- -      struct buffer_head      *head, *bh, *next;
+ +      struct buffer_head      *head = page_buffers(bvec->bv_page), *bh = head;
+ +      bool                    busy = false;
         unsigned int            off = 0;
- -      unsigned int            bsize;
+ +      unsigned long           flags;
   
         ASSERT(bvec->bv_offset < PAGE_SIZE);
         ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
- -      ASSERT(end < PAGE_SIZE);
+ +      ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
         ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
   
- -      bh = head = page_buffers(bvec->bv_page);
- -
- -      bsize = bh->b_size;
+ +      local_irq_save(flags);
+ +      bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
         do {
- -              if (off > end)
- -                      break;
- -              next = bh->b_this_page;
- -              if (off < bvec->bv_offset)
- -                      goto next_bh;
- -              bh->b_end_io(bh, !error);
- -next_bh:
- -              off += bsize;
- -      } while ((bh = next) != head);
+ +              if (off >= bvec->bv_offset &&
+ +                  off < bvec->bv_offset + bvec->bv_len) {
+ +                      ASSERT(buffer_async_write(bh));
+ +                      ASSERT(bh->b_end_io == NULL);
+ +
+ +                      if (error) {
+ +                              mark_buffer_write_io_error(bh);
+ +                              clear_buffer_uptodate(bh);
+ +                              SetPageError(bvec->bv_page);
+ +                      } else {
+ +                              set_buffer_uptodate(bh);
+ +                      }
+ +                      clear_buffer_async_write(bh);
+ +                      unlock_buffer(bh);
+ +              } else if (buffer_async_write(bh)) {
+ +                      ASSERT(buffer_locked(bh));
+ +                      busy = true;
+ +              }
+ +              off += bh->b_size;
+ +      } while ((bh = bh->b_this_page) != head);
+ +      bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
+ +      local_irq_restore(flags);
+ +
+ +      if (!busy)
+ +              end_page_writeback(bvec->bv_page);
   }
   
   /*
@@@ -148,10 -146,8 +161,10 @@@ xfs_destroy_ioend
         int                     error)
   {
         struct inode            *inode = ioend->io_inode;
- -      struct bio              *last = ioend->io_bio;
- -      struct bio              *bio, *next;
+ +      struct bio              *bio = &ioend->io_inline_bio;
+ +      struct bio              *last = ioend->io_bio, *next;
+ +      u64                     start = bio->bi_iter.bi_sector;
+ +      bool                    quiet = bio_flagged(bio, BIO_QUIET);
   
         for (bio = &ioend->io_inline_bio; bio; bio = next) {
                 struct bio_vec  *bvec;
@@@ -172,11 -168,6 +185,11 @@@
   
                 bio_put(bio);
         }
+ +
+ +      if (unlikely(error && !quiet)) {
+ +              xfs_err_ratelimited(XFS_I(inode)->i_mount,
+ +                      "writeback error on sector %llu", start);
+ +      }
   }
   
   /*
@@@ -445,8 -436,7 +458,8 @@@ xfs_start_buffer_writeback
         ASSERT(!buffer_delay(bh));
         ASSERT(!buffer_unwritten(bh));
   
- -      mark_buffer_async_write(bh);
+ +      bh->b_end_io = NULL;
+ +      set_buffer_async_write(bh);
         set_buffer_uptodate(bh);
         clear_buffer_dirty(bh);
   }
@@@ -540,7 -530,7 +553,7 @@@ xfs_init_bio_from_bh
         struct buffer_head      *bh)
   {
         bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- -      bio->bi_bdev = bh->b_bdev;
+ +      bio_set_dev(bio, bh->b_bdev);
   }
   
   static struct xfs_ioend *
diff --combined fs/xfs/xfs_buf.c

index b1c9711e79a46051801a0fe34e95dd0515ff4fef,6deb86c845d1567d32d9a6affadb30edc3712ddc..da14658da3103475940555600581f0bf12217d55
--- 1/fs/xfs/xfs_buf.c
--- 2/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@@ -1281,7 -1281,7 +1281,7 @@@ next_chunk
         nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
   
         bio = bio_alloc(GFP_NOIO, nr_pages);
- -      bio->bi_bdev = bp->b_target->bt_bdev;
+ +      bio_set_dev(bio, bp->b_target->bt_bdev);
         bio->bi_iter.bi_sector = sector;
         bio->bi_end_io = xfs_buf_bio_end_io;
         bio->bi_private = bp;
@@@ -1802,7 -1802,8 +1802,8 @@@ xfs_setsize_buftarg_early
   xfs_buftarg_t *
   xfs_alloc_buftarg(
         struct xfs_mount        *mp,
-       struct block_device     *bdev)
+       struct block_device     *bdev,
+       struct dax_device       *dax_dev)
   {
         xfs_buftarg_t           *btp;
   
@@@ -1811,6 -1812,7 +1812,7 @@@
         btp->bt_mount = mp;
         btp->bt_dev =  bdev->bd_dev;
         btp->bt_bdev = bdev;
+       btp->bt_daxdev = dax_dev;
   
         if (xfs_setsize_buftarg_early(btp, bdev))
                 goto error;
diff --combined fs/xfs/xfs_iomap.c

index 79cb5b3d140c522fd04f9a0f72666f1621c45155,7c934e40733253ae1ffa7468aa4f184d177221f2..a1909bc064e9e70c90ef6ced3017935951aefc2e
--- 1/fs/xfs/xfs_iomap.c
--- 2/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@@ -69,6 -69,7 +69,7 @@@ xfs_bmbt_to_iomap
         iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
         iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
         iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
+       iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
   }
   
   xfs_extlen_t
@@@ -274,7 -275,7 +275,7 @@@ xfs_iomap_write_direct
         /*
          * Complete the transaction
          */
- -      error = xfs_defer_finish(&tp, &dfops, NULL);
+ +      error = xfs_defer_finish(&tp, &dfops);
         if (error)
                 goto out_bmap_cancel;
   
@@@ -520,6 -521,7 +521,6 @@@ xfs_file_iomap_begin_delay
         struct inode            *inode,
         loff_t                  offset,
         loff_t                  count,
- -      unsigned                flags,
         struct iomap            *iomap)
   {
         struct xfs_inode        *ip = XFS_I(inode);
@@@ -783,7 -785,7 +784,7 @@@ xfs_iomap_write_allocate
                         if (error)
                                 goto trans_cancel;
   
- -                      error = xfs_defer_finish(&tp, &dfops, NULL);
+ +                      error = xfs_defer_finish(&tp, &dfops);
                         if (error)
                                 goto trans_cancel;
   
@@@ -905,7 -907,7 +906,7 @@@ xfs_iomap_write_unwritten
                         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
                 }
   
- -              error = xfs_defer_finish(&tp, &dfops, NULL);
+ +              error = xfs_defer_finish(&tp, &dfops);
                 if (error)
                         goto error_on_bmapi_transaction;
   
@@@ -975,7 -977,6 +976,6 @@@ xfs_file_iomap_begin
         int                     nimaps = 1, error = 0;
         bool                    shared = false, trimmed = false;
         unsigned                lockmode;
-       struct block_device     *bdev;
   
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
@@@ -983,7 -984,8 +983,7 @@@
         if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
                         !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
                 /* Reserve delalloc blocks for regular writeback. */
- -              return xfs_file_iomap_begin_delay(inode, offset, length, flags,
- -                              iomap);
+ +              return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
         }
   
         if (need_excl_ilock(ip, flags)) {
@@@ -1085,13 -1087,6 +1085,6 @@@
   
         xfs_bmbt_to_iomap(ip, iomap, &imap);
   
-       /* optionally associate a dax device with the iomap bdev */
-       bdev = iomap->bdev;
-       if (blk_queue_dax(bdev->bd_queue))
-               iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
-       else
-               iomap->dax_dev = NULL;
- 
         if (shared)
                 iomap->flags |= IOMAP_F_SHARED;
         return 0;
@@@ -1169,7 -1164,6 +1162,6 @@@ xfs_file_iomap_end
         unsigned                flags,
         struct iomap            *iomap)
   {
-       fs_put_dax(iomap->dax_dev);
         if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
                 return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
                                 length, written, iomap);
diff --combined fs/xfs/xfs_super.c

index c1c4c2ea1014a70f2723289769d795088b562fe2,ee4225c65f0ca85e7d5dbbc0b96250cba6c70c26..3008f31753dfc4e915be5cc96144a51a07658f2a
--- 1/fs/xfs/xfs_super.c
--- 2/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@@ -714,17 -714,26 +714,26 @@@ STATIC voi
   xfs_close_devices(
         struct xfs_mount        *mp)
   {
+       struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
+ 
         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
                 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+               struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
+ 
                 xfs_free_buftarg(mp, mp->m_logdev_targp);
                 xfs_blkdev_put(logdev);
+               fs_put_dax(dax_logdev);
         }
         if (mp->m_rtdev_targp) {
                 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+               struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
+ 
                 xfs_free_buftarg(mp, mp->m_rtdev_targp);
                 xfs_blkdev_put(rtdev);
+               fs_put_dax(dax_rtdev);
         }
         xfs_free_buftarg(mp, mp->m_ddev_targp);
+       fs_put_dax(dax_ddev);
   }
   
   /*
@@@ -742,6 -751,8 +751,8 @@@ xfs_open_devices
         struct xfs_mount        *mp)
   {
         struct block_device     *ddev = mp->m_super->s_bdev;
+       struct dax_device       *dax_ddev = fs_dax_get_by_bdev(ddev);
+       struct dax_device       *dax_logdev = NULL, *dax_rtdev = NULL;
         struct block_device     *logdev = NULL, *rtdev = NULL;
         int                     error;
   
@@@ -752,6 -763,7 +763,7 @@@
                 error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
                 if (error)
                         goto out;
+               dax_logdev = fs_dax_get_by_bdev(logdev);
         }
   
         if (mp->m_rtname) {
@@@ -765,24 -777,25 +777,25 @@@
                         error = -EINVAL;
                         goto out_close_rtdev;
                 }
+               dax_rtdev = fs_dax_get_by_bdev(rtdev);
         }
   
         /*
          * Setup xfs_mount buffer target pointers
          */
         error = -ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
+       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
         if (!mp->m_ddev_targp)
                 goto out_close_rtdev;
   
         if (rtdev) {
-               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
+               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
                 if (!mp->m_rtdev_targp)
                         goto out_free_ddev_targ;
         }
   
         if (logdev && logdev != ddev) {
-               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
+               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
                 if (!mp->m_logdev_targp)
                         goto out_free_rtdev_targ;
         } else {
@@@ -798,10 -811,14 +811,14 @@@
         xfs_free_buftarg(mp, mp->m_ddev_targp);
    out_close_rtdev:
         xfs_blkdev_put(rtdev);
+       fs_put_dax(dax_rtdev);
    out_close_logdev:
-       if (logdev && logdev != ddev)
+       if (logdev && logdev != ddev) {
                 xfs_blkdev_put(logdev);
+               fs_put_dax(dax_logdev);
+       }
    out:
+       fs_put_dax(dax_ddev);
         return error;
   }
   
@@@ -1220,7 -1237,7 +1237,7 @@@ xfs_test_remount_options
         tmp_mp->m_super = sb;
         error = xfs_parseargs(tmp_mp, options);
         xfs_free_fsname(tmp_mp);
- -      kfree(tmp_mp);
+ +      kmem_free(tmp_mp);
   
         return error;
   }
diff --combined include/linux/dax.h

index eb0bff6f1eab2a1d5043d17c086ffd58a51d4278,ac8afa18f707410ec6b837051f7395650ac6f4dc..46cad1d0f12970e4c764374992c9432af431b7b3
--- 1/include/linux/dax.h
--- 2/include/linux/dax.h
+++ b/include/linux/dax.h
@@@ -57,6 -57,7 +57,7 @@@ static inline void fs_put_dax(struct da
         put_dax(dax_dev);
   }
   
+ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
   #else
   static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
   {
@@@ -71,6 -72,11 +72,11 @@@ static inline struct dax_device *fs_dax
   static inline void fs_put_dax(struct dax_device *dax_dev)
   {
   }
+ 
+ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
+ {
+       return NULL;
+ }
   #endif
   
   int dax_read_lock(void);
@@@ -89,6 -95,34 +95,6 @@@ void dax_flush(struct dax_device *dax_d
   void dax_write_cache(struct dax_device *dax_dev, bool wc);
   bool dax_write_cache_enabled(struct dax_device *dax_dev);
   
- -/*
- - * We use lowest available bit in exceptional entry for locking, one bit for
- - * the entry size (PMD) and two more to tell us if the entry is a huge zero
- - * page (HZP) or an empty entry that is just used for locking.  In total four
- - * special bits.
- - *
- - * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the HZP and
- - * EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
- - * block allocation.
- - */
- -#define RADIX_DAX_SHIFT       (RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
- -#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
- -#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
- -#define RADIX_DAX_HZP (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
- -#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
- -
- -static inline unsigned long dax_radix_sector(void *entry)
- -{
- -      return (unsigned long)entry >> RADIX_DAX_SHIFT;
- -}
- -
- -static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
- -{
- -      return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
- -                      ((unsigned long)sector << RADIX_DAX_SHIFT) |
- -                      RADIX_DAX_ENTRY_LOCK);
- -}
- -
   ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
                 const struct iomap_ops *ops);
   int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
@@@ -96,6 -130,8 +102,6 @@@
   int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
   int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
                                       pgoff_t index);
- -void dax_wake_mapping_entry_waiter(struct address_space *mapping,
- -              pgoff_t index, void *entry, bool wake_all);
   
   #ifdef CONFIG_FS_DAX
   int __dax_zero_page_range(struct block_device *bdev,
@@@ -110,6 -146,21 +116,6 @@@ static inline int __dax_zero_page_range
   }
   #endif
   
- -#ifdef CONFIG_FS_DAX_PMD
- -static inline unsigned int dax_radix_order(void *entry)
- -{
- -      if ((unsigned long)entry & RADIX_DAX_PMD)
- -              return PMD_SHIFT - PAGE_SHIFT;
- -      return 0;
- -}
- -#else
- -static inline unsigned int dax_radix_order(void *entry)
- -{
- -      return 0;
- -}
- -#endif
- -int dax_pfn_mkwrite(struct vm_fault *vmf);
- -
   static inline bool dax_mapping(struct address_space *mapping)
   {
         return mapping->host && IS_DAX(mapping->host);
diff --combined lib/Kconfig

index 40b114a11d7cea28a6b93521fa58ad958d8ddbe4,527da69e3be1a1c192e10c787befcb82bad350cd..a85e6f76add5c9149c79b0787804b4b1774c0982
--- 1/lib/Kconfig
--- 2/lib/Kconfig
+++ b/lib/Kconfig
@@@ -559,9 -559,6 +559,6 @@@ config ARCH_HAS_PMEM_AP
   config ARCH_HAS_UACCESS_FLUSHCACHE
         bool
   
- config ARCH_HAS_MMIO_FLUSH
-       bool
- 
   config STACKDEPOT
         bool
         select STACKTRACE
@@@ -575,7 -572,4 +572,7 @@@ config PARMA
   config PRIME_NUMBERS
         tristate
   
+ +config STRING_SELFTEST
+ +      bool "Test string functions"
+ +
   endmenu
author	Linus Torvalds <[email protected]>
	Mon, 11 Sep 2017 20:10:57 +0000 (13:10 -0700)
committer	Linus Torvalds <[email protected]>
	Mon, 11 Sep 2017 20:10:57 +0000 (13:10 -0700)
		1	2
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/acpi/nfit/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvdimm/btt.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvdimm/nd.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ext4.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_aops.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_buf.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_iomap.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/dax.h	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history