Merge tag 'cxl-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

author Linus Torvalds <[email protected]>

Sat, 28 May 2022 04:24:19 +0000 (21:24 -0700)

committer Linus Torvalds <[email protected]>

Sat, 28 May 2022 04:24:19 +0000 (21:24 -0700)
author Linus Torvalds <[email protected]>
Sat, 28 May 2022 04:24:19 +0000 (21:24 -0700)
committer Linus Torvalds <[email protected]>
Sat, 28 May 2022 04:24:19 +0000 (21:24 -0700)
diff --combined drivers/acpi/bus.c

index b67d2ee77cd136eeab5e1b0485d1a26b1be2b607,7658acbbb2bdd284ea61d49abce1274eee39aa23..86fa61a21826c3f25d7eef67255aa065da402bb6
--- 1/drivers/acpi/bus.c
--- 2/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@@ -278,20 -278,6 +278,20 @@@ bool osc_sb_apei_support_acked
   bool osc_pc_lpi_support_confirmed;
   EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed);
   
+ +/*
+ + * ACPI 6.2 Section 6.2.11.2 'Platform-Wide OSPM Capabilities':
+ + *   Starting with ACPI Specification 6.2, all _CPC registers can be in
+ + *   PCC, System Memory, System IO, or Functional Fixed Hardware address
+ + *   spaces. OSPM support for this more flexible register space scheme is
+ + *   indicated by the “Flexible Address Space for CPPC Registers” _OSC bit.
+ + *
+ + * Otherwise (cf ACPI 6.1, s8.4.7.1.1.X), _CPC registers must be in:
+ + * - PCC or Functional Fixed Hardware address space if defined
+ + * - SystemMemory address space (NULL register) if not defined
+ + */
+ +bool osc_cpc_flexible_adr_space_confirmed;
+ +EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed);
+ +
   /*
    * ACPI 6.4 Operating System Capabilities for USB.
    */
@@@ -329,15 -315,12 +329,15 @@@ static void acpi_bus_osc_negotiate_plat
   #endif
   #ifdef CONFIG_X86
         capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT;
- -      if (boot_cpu_has(X86_FEATURE_HWP)) {
- -              capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
- -              capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
- -      }
   #endif
   
+ +#ifdef CONFIG_ACPI_CPPC_LIB
+ +      capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
+ +      capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
+ +#endif
+ +
+ +      capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_FLEXIBLE_ADR_SPACE;
+ +
         if (IS_ENABLED(CONFIG_SCHED_MC_PRIO))
                 capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT;
   
@@@ -358,9 -341,10 +358,9 @@@
                 return;
         }
   
- -#ifdef CONFIG_X86
- -      if (boot_cpu_has(X86_FEATURE_HWP))
- -              osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
- -                              (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
+ +#ifdef CONFIG_ACPI_CPPC_LIB
+ +      osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
+ +                      (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
   #endif
   
         /*
@@@ -382,8 -366,6 +382,8 @@@
                         capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
                 osc_sb_native_usb4_support_confirmed =
                         capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+ +              osc_cpc_flexible_adr_space_confirmed =
+ +                      capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPC_FLEXIBLE_ADR_SPACE;
         }
   
         kfree(context.ret.pointer);
@@@ -443,7 -425,7 +443,7 @@@ static void acpi_bus_osc_negotiate_usb_
         }
   
         osc_sb_native_usb4_control =
-               control & ((u32 *)context.ret.pointer)[OSC_CONTROL_DWORD];
+               control &  acpi_osc_ctx_get_pci_control(&context);
   
         acpi_bus_decode_usb_osc("USB4 _OSC: OS supports", control);
         acpi_bus_decode_usb_osc("USB4 _OSC: OS controls",
@@@ -1088,32 -1070,6 +1088,32 @@@ int acpi_bus_for_each_dev(int (*fn)(str
   }
   EXPORT_SYMBOL_GPL(acpi_bus_for_each_dev);
   
+ +struct acpi_dev_walk_context {
+ +      int (*fn)(struct acpi_device *, void *);
+ +      void *data;
+ +};
+ +
+ +static int acpi_dev_for_one_check(struct device *dev, void *context)
+ +{
+ +      struct acpi_dev_walk_context *adwc = context;
+ +
+ +      if (dev->bus != &acpi_bus_type)
+ +              return 0;
+ +
+ +      return adwc->fn(to_acpi_device(dev), adwc->data);
+ +}
+ +
+ +int acpi_dev_for_each_child(struct acpi_device *adev,
+ +                          int (*fn)(struct acpi_device *, void *), void *data)
+ +{
+ +      struct acpi_dev_walk_context adwc = {
+ +              .fn = fn,
+ +              .data = data,
+ +      };
+ +
+ +      return device_for_each_child(&adev->dev, &adwc, acpi_dev_for_one_check);
+ +}
+ +
   /* --------------------------------------------------------------------------
                                Initialization/Cleanup
      -------------------------------------------------------------------------- */
diff --combined drivers/acpi/pci_root.c

index b3b507f20e87edf061ca9099fac1070de1be3255,c82ad63fffed7050afde1038baba8c14e40bf55c..d57cf8454b93e3be6a67546a3a654458c8e71ddf
--- 1/drivers/acpi/pci_root.c
--- 2/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@@ -140,6 -140,17 +140,17 @@@ static struct pci_osc_bit_struct pci_os
         { OSC_PCI_EXPRESS_DPC_CONTROL, "DPC" },
   };
   
+ static struct pci_osc_bit_struct cxl_osc_support_bit[] = {
+       { OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT, "CXL11PortRegAccess" },
+       { OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT, "CXL20PortDevRegAccess" },
+       { OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT, "CXLProtocolErrorReporting" },
+       { OSC_CXL_NATIVE_HP_SUPPORT, "CXLNativeHotPlug" },
+ };
+ 
+ static struct pci_osc_bit_struct cxl_osc_control_bit[] = {
+       { OSC_CXL_ERROR_REPORTING_CONTROL, "CXLMemErrorReporting" },
+ };
+ 
   static void decode_osc_bits(struct acpi_pci_root *root, char *msg, u32 word,
                             struct pci_osc_bit_struct *table, int size)
   {
@@@ -168,33 -179,73 +179,73 @@@ static void decode_osc_control(struct a
                         ARRAY_SIZE(pci_osc_control_bit));
   }
   
+ static void decode_cxl_osc_support(struct acpi_pci_root *root, char *msg, u32 word)
+ {
+       decode_osc_bits(root, msg, word, cxl_osc_support_bit,
+                       ARRAY_SIZE(cxl_osc_support_bit));
+ }
+ 
+ static void decode_cxl_osc_control(struct acpi_pci_root *root, char *msg, u32 word)
+ {
+       decode_osc_bits(root, msg, word, cxl_osc_control_bit,
+                       ARRAY_SIZE(cxl_osc_control_bit));
+ }
+ 
+ static inline bool is_pcie(struct acpi_pci_root *root)
+ {
+       return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE;
+ }
+ 
+ static inline bool is_cxl(struct acpi_pci_root *root)
+ {
+       return root->bridge_type == ACPI_BRIDGE_TYPE_CXL;
+ }
+ 
   static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766";
+ static u8 cxl_osc_uuid_str[] = "68F2D50B-C469-4d8A-BD3D-941A103FD3FC";
   
- static acpi_status acpi_pci_run_osc(acpi_handle handle,
-                                   const u32 *capbuf, u32 *retval)
+ static char *to_uuid(struct acpi_pci_root *root)
+ {
+       if (is_cxl(root))
+               return cxl_osc_uuid_str;
+       return pci_osc_uuid_str;
+ }
+ 
+ static int cap_length(struct acpi_pci_root *root)
+ {
+       if (is_cxl(root))
+               return sizeof(u32) * OSC_CXL_CAPABILITY_DWORDS;
+       return sizeof(u32) * OSC_PCI_CAPABILITY_DWORDS;
+ }
+ 
+ static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root,
+                                   const u32 *capbuf, u32 *pci_control,
+                                   u32 *cxl_control)
   {
         struct acpi_osc_context context = {
-               .uuid_str = pci_osc_uuid_str,
+               .uuid_str = to_uuid(root),
                 .rev = 1,
-               .cap.length = 12,
+               .cap.length = cap_length(root),
                 .cap.pointer = (void *)capbuf,
         };
         acpi_status status;
   
-       status = acpi_run_osc(handle, &context);
+       status = acpi_run_osc(root->device->handle, &context);
         if (ACPI_SUCCESS(status)) {
-               *retval = *((u32 *)(context.ret.pointer + 8));
+               *pci_control = acpi_osc_ctx_get_pci_control(&context);
+               if (is_cxl(root))
+                       *cxl_control = acpi_osc_ctx_get_cxl_control(&context);
                 kfree(context.ret.pointer);
         }
         return status;
   }
   
- static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root,
-                                       u32 support,
-                                       u32 *control)
+ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 support,
+                                     u32 *control, u32 cxl_support,
+                                     u32 *cxl_control)
   {
         acpi_status status;
-       u32 result, capbuf[3];
+       u32 pci_result, cxl_result, capbuf[OSC_CXL_CAPABILITY_DWORDS];
   
         support |= root->osc_support_set;
   
@@@ -202,10 -253,28 +253,28 @@@
         capbuf[OSC_SUPPORT_DWORD] = support;
         capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set;
   
-       status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
+       if (is_cxl(root)) {
+               cxl_support |= root->osc_ext_support_set;
+               capbuf[OSC_EXT_SUPPORT_DWORD] = cxl_support;
+               capbuf[OSC_EXT_CONTROL_DWORD] = *cxl_control | root->osc_ext_control_set;
+       }
+ 
+ retry:
+       status = acpi_pci_run_osc(root, capbuf, &pci_result, &cxl_result);
         if (ACPI_SUCCESS(status)) {
                 root->osc_support_set = support;
-               *control = result;
+               *control = pci_result;
+               if (is_cxl(root)) {
+                       root->osc_ext_support_set = cxl_support;
+                       *cxl_control = cxl_result;
+               }
+       } else if (is_cxl(root)) {
+               /*
+                * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC
+                * upon any failure using CXL _OSC.
+                */
+               root->bridge_type = ACPI_BRIDGE_TYPE_PCIE;
+               goto retry;
         }
         return status;
   }
@@@ -321,6 -390,8 +390,8 @@@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev)
    * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex).
    * @mask: Mask of _OSC bits to request control of, place to store control mask.
    * @support: _OSC supported capability.
+  * @cxl_mask: Mask of CXL _OSC control bits, place to store control mask.
+  * @cxl_support: CXL _OSC supported capability.
    *
    * Run _OSC query for @mask and if that is successful, compare the returned
    * mask of control bits with @req.  If all of the @req bits are set in the
@@@ -331,12 -402,14 +402,14 @@@
    * _OSC bits the BIOS has granted control of, but its contents are meaningless
    * on failure.
    **/
- static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 support)
+ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask,
+                                           u32 support, u32 *cxl_mask,
+                                           u32 cxl_support)
   {
         u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL;
         struct acpi_pci_root *root;
         acpi_status status;
-       u32 ctrl, capbuf[3];
+       u32 ctrl, cxl_ctrl = 0, capbuf[OSC_CXL_CAPABILITY_DWORDS];
   
         if (!mask)
                 return AE_BAD_PARAMETER;
@@@ -348,20 -421,42 +421,42 @@@
         ctrl   = *mask;
         *mask |= root->osc_control_set;
   
+       if (is_cxl(root)) {
+               cxl_ctrl = *cxl_mask;
+               *cxl_mask |= root->osc_ext_control_set;
+       }
+ 
         /* Need to check the available controls bits before requesting them. */
         do {
-               status = acpi_pci_query_osc(root, support, mask);
+               u32 pci_missing = 0, cxl_missing = 0;
+ 
+               status = acpi_pci_query_osc(root, support, mask, cxl_support,
+                                           cxl_mask);
                 if (ACPI_FAILURE(status))
                         return status;
-               if (ctrl == *mask)
-                       break;
-               decode_osc_control(root, "platform does not support",
-                                  ctrl & ~(*mask));
+               if (is_cxl(root)) {
+                       if (ctrl == *mask && cxl_ctrl == *cxl_mask)
+                               break;
+                       pci_missing = ctrl & ~(*mask);
+                       cxl_missing = cxl_ctrl & ~(*cxl_mask);
+               } else {
+                       if (ctrl == *mask)
+                               break;
+                       pci_missing = ctrl & ~(*mask);
+               }
+               if (pci_missing)
+                       decode_osc_control(root, "platform does not support",
+                                          pci_missing);
+               if (cxl_missing)
+                       decode_cxl_osc_control(root, "CXL platform does not support",
+                                          cxl_missing);
                 ctrl = *mask;
-       } while (*mask);
+               cxl_ctrl = *cxl_mask;
+       } while (*mask || *cxl_mask);
   
         /* No need to request _OSC if the control was already granted. */
-       if ((root->osc_control_set & ctrl) == ctrl)
+       if ((root->osc_control_set & ctrl) == ctrl &&
+           (root->osc_ext_control_set & cxl_ctrl) == cxl_ctrl)
                 return AE_OK;
   
         if ((ctrl & req) != req) {
@@@ -373,11 -468,17 +468,17 @@@
         capbuf[OSC_QUERY_DWORD] = 0;
         capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set;
         capbuf[OSC_CONTROL_DWORD] = ctrl;
-       status = acpi_pci_run_osc(handle, capbuf, mask);
+       if (is_cxl(root)) {
+               capbuf[OSC_EXT_SUPPORT_DWORD] = root->osc_ext_support_set;
+               capbuf[OSC_EXT_CONTROL_DWORD] = cxl_ctrl;
+       }
+ 
+       status = acpi_pci_run_osc(root, capbuf, mask, cxl_mask);
         if (ACPI_FAILURE(status))
                 return status;
   
         root->osc_control_set = *mask;
+       root->osc_ext_control_set = *cxl_mask;
         return AE_OK;
   }
   
@@@ -403,6 -504,53 +504,53 @@@ static u32 calculate_support(void
         return support;
   }
   
+ /*
+  * Background on hotplug support, and making it depend on only
+  * CONFIG_HOTPLUG_PCI_PCIE vs. also considering CONFIG_MEMORY_HOTPLUG:
+  *
+  * CONFIG_ACPI_HOTPLUG_MEMORY does depend on CONFIG_MEMORY_HOTPLUG, but
+  * there is no existing _OSC for memory hotplug support. The reason is that
+  * ACPI memory hotplug requires the OS to acknowledge / coordinate with
+  * memory plug events via a scan handler. On the CXL side the equivalent
+  * would be if Linux supported the Mechanical Retention Lock [1], or
+  * otherwise had some coordination for the driver of a PCI device
+  * undergoing hotplug to be consulted on whether the hotplug should
+  * proceed or not.
+  *
+  * The concern is that if Linux says no to supporting CXL hotplug then
+  * the BIOS may say no to giving the OS hotplug control of any other PCIe
+  * device. So the question here is not whether hotplug is enabled, it's
+  * whether it is handled natively by the at all OS, and if
+  * CONFIG_HOTPLUG_PCI_PCIE is enabled then the answer is "yes".
+  *
+  * Otherwise, the plan for CXL coordinated remove, since the kernel does
+  * not support blocking hotplug, is to require the memory device to be
+  * disabled before hotplug is attempted. When CONFIG_MEMORY_HOTPLUG is
+  * disabled that step will fail and the remove attempt cancelled by the
+  * user. If that is not honored and the card is removed anyway then it
+  * does not matter if CONFIG_MEMORY_HOTPLUG is enabled or not, it will
+  * cause a crash and other badness.
+  *
+  * Therefore, just say yes to CXL hotplug and require removal to
+  * be coordinated by userspace unless and until the kernel grows better
+  * mechanisms for doing "managed" removal of devices in consultation with
+  * the driver.
+  *
+  * [1]: https://lore.kernel.org/all/[email protected]/
+  */
+ static u32 calculate_cxl_support(void)
+ {
+       u32 support;
+ 
+       support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT;
+       if (pci_aer_available())
+               support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT;
+       if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+               support |= OSC_CXL_NATIVE_HP_SUPPORT;
+ 
+       return support;
+ }
+ 
   static u32 calculate_control(void)
   {
         u32 control;
@@@ -434,6 -582,16 +582,16 @@@
         return control;
   }
   
+ static u32 calculate_cxl_control(void)
+ {
+       u32 control = 0;
+ 
+       if (IS_ENABLED(CONFIG_MEMORY_FAILURE))
+               control |= OSC_CXL_ERROR_REPORTING_CONTROL;
+ 
+       return control;
+ }
+ 
   static bool os_control_query_checks(struct acpi_pci_root *root, u32 support)
   {
         struct acpi_device *device = root->device;
@@@ -452,10 -610,10 +610,10 @@@
         return true;
   }
   
- static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm,
-                                bool is_pcie)
+ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm)
   {
         u32 support, control = 0, requested = 0;
+       u32 cxl_support = 0, cxl_control = 0, cxl_requested = 0;
         acpi_status status;
         struct acpi_device *device = root->device;
         acpi_handle handle = device->handle;
@@@ -479,10 -637,20 +637,20 @@@
         if (os_control_query_checks(root, support))
                 requested = control = calculate_control();
   
-       status = acpi_pci_osc_control_set(handle, &control, support);
+       if (is_cxl(root)) {
+               cxl_support = calculate_cxl_support();
+               decode_cxl_osc_support(root, "OS supports", cxl_support);
+               cxl_requested = cxl_control = calculate_cxl_control();
+       }
+ 
+       status = acpi_pci_osc_control_set(handle, &control, support,
+                                         &cxl_control, cxl_support);
         if (ACPI_SUCCESS(status)) {
                 if (control)
                         decode_osc_control(root, "OS now controls", control);
+               if (cxl_control)
+                       decode_cxl_osc_control(root, "OS now controls",
+                                          cxl_control);
   
                 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
                         /*
@@@ -504,13 -672,18 +672,18 @@@
                 *no_aspm = 1;
   
                 /* _OSC is optional for PCI host bridges */
-               if ((status == AE_NOT_FOUND) && !is_pcie)
+               if (status == AE_NOT_FOUND && !is_pcie(root))
                         return;
   
                 if (control) {
                         decode_osc_control(root, "OS requested", requested);
                         decode_osc_control(root, "platform willing to grant", control);
                 }
+               if (cxl_control) {
+                       decode_cxl_osc_control(root, "OS requested", cxl_requested);
+                       decode_cxl_osc_control(root, "platform willing to grant",
+                                          cxl_control);
+               }
   
                 dev_info(&device->dev, "_OSC: platform retains control of PCIe features (%s)\n",
                          acpi_format_exception(status));
@@@ -527,7 -700,7 +700,7 @@@ static int acpi_pci_root_add(struct acp
         acpi_handle handle = device->handle;
         int no_aspm = 0;
         bool hotadd = system_state == SYSTEM_RUNNING;
-       bool is_pcie;
+       const char *acpi_hid;
   
         root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
         if (!root)
@@@ -585,8 -758,15 +758,15 @@@
   
         root->mcfg_addr = acpi_pci_root_get_mcfg_addr(handle);
   
-       is_pcie = strcmp(acpi_device_hid(device), "PNP0A08") == 0;
-       negotiate_os_control(root, &no_aspm, is_pcie);
+       acpi_hid = acpi_device_hid(root->device);
+       if (strcmp(acpi_hid, "PNP0A08") == 0)
+               root->bridge_type = ACPI_BRIDGE_TYPE_PCIE;
+       else if (strcmp(acpi_hid, "ACPI0016") == 0)
+               root->bridge_type = ACPI_BRIDGE_TYPE_CXL;
+       else
+               dev_dbg(&device->dev, "Assuming non-PCIe host bridge\n");
+ 
+       negotiate_os_control(root, &no_aspm);
   
         /*
          * TBD: Need PCI interface for enumeration/configuration of roots.
@@@ -927,8 -1107,6 +1107,8 @@@ struct pci_bus *acpi_pci_root_create(st
                 host_bridge->preserve_config = 1;
         ACPI_FREE(obj);
   
+ +      acpi_dev_power_up_children_with_adr(device);
+ +
         pci_scan_child_bus(bus);
         pci_set_host_bridge_release(host_bridge, acpi_pci_root_release_info,
                                     info);
diff --combined drivers/nvdimm/pmem.c

index 6b24ecada6953b031fbf8dac290de5e416cb3e61,3992521c151facdcc4e25624f9aff431b4905279..629d10fcf53b2c0e828e6b9641189562b105b293
--- 1/drivers/nvdimm/pmem.c
--- 2/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@@ -45,25 -45,9 +45,25 @@@ static struct nd_region *to_region(stru
         return to_nd_region(to_dev(pmem)->parent);
   }
   
- -static void hwpoison_clear(struct pmem_device *pmem,
- -              phys_addr_t phys, unsigned int len)
+ +static phys_addr_t to_phys(struct pmem_device *pmem, phys_addr_t offset)
   {
+ +      return pmem->phys_addr + offset;
+ +}
+ +
+ +static sector_t to_sect(struct pmem_device *pmem, phys_addr_t offset)
+ +{
+ +      return (offset - pmem->data_offset) >> SECTOR_SHIFT;
+ +}
+ +
+ +static phys_addr_t to_offset(struct pmem_device *pmem, sector_t sector)
+ +{
+ +      return (sector << SECTOR_SHIFT) + pmem->data_offset;
+ +}
+ +
+ +static void pmem_mkpage_present(struct pmem_device *pmem, phys_addr_t offset,
+ +              unsigned int len)
+ +{
+ +      phys_addr_t phys = to_phys(pmem, offset);
         unsigned long pfn_start, pfn_end, pfn;
   
         /* only pmem in the linear map supports HWPoison */
@@@ -85,40 -69,33 +85,40 @@@
         }
   }
   
- -static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
- -              phys_addr_t offset, unsigned int len)
+ +static void pmem_clear_bb(struct pmem_device *pmem, sector_t sector, long blks)
   {
- -      struct device *dev = to_dev(pmem);
- -      sector_t sector;
- -      long cleared;
- -      blk_status_t rc = BLK_STS_OK;
+ +      if (blks == 0)
+ +              return;
+ +      badblocks_clear(&pmem->bb, sector, blks);
+ +      if (pmem->bb_state)
+ +              sysfs_notify_dirent(pmem->bb_state);
+ +}
   
- -      sector = (offset - pmem->data_offset) / 512;
+ +static long __pmem_clear_poison(struct pmem_device *pmem,
+ +              phys_addr_t offset, unsigned int len)
+ +{
+ +      phys_addr_t phys = to_phys(pmem, offset);
+ +      long cleared = nvdimm_clear_poison(to_dev(pmem), phys, len);
   
- -      cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
- -      if (cleared < len)
- -              rc = BLK_STS_IOERR;
- -      if (cleared > 0 && cleared / 512) {
- -              hwpoison_clear(pmem, pmem->phys_addr + offset, cleared);
- -              cleared /= 512;
- -              dev_dbg(dev, "%#llx clear %ld sector%s\n",
- -                              (unsigned long long) sector, cleared,
- -                              cleared > 1 ? "s" : "");
- -              badblocks_clear(&pmem->bb, sector, cleared);
- -              if (pmem->bb_state)
- -                      sysfs_notify_dirent(pmem->bb_state);
+ +      if (cleared > 0) {
+ +              pmem_mkpage_present(pmem, offset, cleared);
+ +              arch_invalidate_pmem(pmem->virt_addr + offset, len);
         }
+ +      return cleared;
+ +}
   
- -      arch_invalidate_pmem(pmem->virt_addr + offset, len);
+ +static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+ +              phys_addr_t offset, unsigned int len)
+ +{
+ +      long cleared = __pmem_clear_poison(pmem, offset, len);
   
- -      return rc;
+ +      if (cleared < 0)
+ +              return BLK_STS_IOERR;
+ +
+ +      pmem_clear_bb(pmem, to_sect(pmem, offset), cleared >> SECTOR_SHIFT);
+ +      if (cleared < len)
+ +              return BLK_STS_IOERR;
+ +      return BLK_STS_OK;
   }
   
   static void write_pmem(void *pmem_addr, struct page *page,
@@@ -166,7 -143,7 +166,7 @@@ static blk_status_t pmem_do_read(struc
                         sector_t sector, unsigned int len)
   {
         blk_status_t rc;
- -      phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
+ +      phys_addr_t pmem_off = to_offset(pmem, sector);
         void *pmem_addr = pmem->virt_addr + pmem_off;
   
         if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
@@@ -181,20 -158,36 +181,20 @@@ static blk_status_t pmem_do_write(struc
                         struct page *page, unsigned int page_off,
                         sector_t sector, unsigned int len)
   {
- -      blk_status_t rc = BLK_STS_OK;
- -      bool bad_pmem = false;
- -      phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
+ +      phys_addr_t pmem_off = to_offset(pmem, sector);
         void *pmem_addr = pmem->virt_addr + pmem_off;
   
- -      if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
- -              bad_pmem = true;
+ +      if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) {
+ +              blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len);
+ +
+ +              if (rc != BLK_STS_OK)
+ +                      return rc;
+ +      }
   
- -      /*
- -       * Note that we write the data both before and after
- -       * clearing poison.  The write before clear poison
- -       * handles situations where the latest written data is
- -       * preserved and the clear poison operation simply marks
- -       * the address range as valid without changing the data.
- -       * In this case application software can assume that an
- -       * interrupted write will either return the new good
- -       * data or an error.
- -       *
- -       * However, if pmem_clear_poison() leaves the data in an
- -       * indeterminate state we need to perform the write
- -       * after clear poison.
- -       */
         flush_dcache_page(page);
         write_pmem(pmem_addr, page, page_off, len);
- -      if (unlikely(bad_pmem)) {
- -              rc = pmem_clear_poison(pmem, pmem_off, len);
- -              write_pmem(pmem_addr, page, page_off, len);
- -      }
   
- -      return rc;
+ +      return BLK_STS_OK;
   }
   
   static void pmem_submit_bio(struct bio *bio)
@@@ -262,47 -255,24 +262,47 @@@ static int pmem_rw_page(struct block_de
   
   /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
   __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
- -              long nr_pages, void **kaddr, pfn_t *pfn)
+ +              long nr_pages, enum dax_access_mode mode, void **kaddr,
+ +              pfn_t *pfn)
   {
         resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
- -
- -      if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
- -                                      PFN_PHYS(nr_pages))))
- -              return -EIO;
+ +      sector_t sector = PFN_PHYS(pgoff) >> SECTOR_SHIFT;
+ +      unsigned int num = PFN_PHYS(nr_pages) >> SECTOR_SHIFT;
+ +      struct badblocks *bb = &pmem->bb;
+ +      sector_t first_bad;
+ +      int num_bad;
   
         if (kaddr)
                 *kaddr = pmem->virt_addr + offset;
         if (pfn)
                 *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
   
+ +      if (bb->count &&
+ +          badblocks_check(bb, sector, num, &first_bad, &num_bad)) {
+ +              long actual_nr;
+ +
+ +              if (mode != DAX_RECOVERY_WRITE)
+ +                      return -EIO;
+ +
+ +              /*
+ +               * Set the recovery stride is set to kernel page size because
+ +               * the underlying driver and firmware clear poison functions
+ +               * don't appear to handle large chunk(such as 2MiB) reliably.
+ +               */
+ +              actual_nr = PHYS_PFN(
+ +                      PAGE_ALIGN((first_bad - sector) << SECTOR_SHIFT));
+ +              dev_dbg(pmem->bb.dev, "start sector(%llu), nr_pages(%ld), first_bad(%llu), actual_nr(%ld)\n",
+ +                              sector, nr_pages, first_bad, actual_nr);
+ +              if (actual_nr)
+ +                      return actual_nr;
+ +              return 1;
+ +      }
+ +
         /*
- -       * If badblocks are present, limit known good range to the
- -       * requested range.
+ +       * If badblocks are present but not in the range, limit known good range
+ +       * to the requested range.
          */
- -      if (unlikely(pmem->bb.count))
+ +      if (bb->count)
                 return nr_pages;
         return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
   }
@@@ -324,73 -294,16 +324,73 @@@ static int pmem_dax_zero_page_range(str
   }
   
   static long pmem_dax_direct_access(struct dax_device *dax_dev,
- -              pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
+ +              pgoff_t pgoff, long nr_pages, enum dax_access_mode mode,
+ +              void **kaddr, pfn_t *pfn)
+ +{
+ +      struct pmem_device *pmem = dax_get_private(dax_dev);
+ +
+ +      return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn);
+ +}
+ +
+ +/*
+ + * The recovery write thread started out as a normal pwrite thread and
+ + * when the filesystem was told about potential media error in the
+ + * range, filesystem turns the normal pwrite to a dax_recovery_write.
+ + *
+ + * The recovery write consists of clearing media poison, clearing page
+ + * HWPoison bit, reenable page-wide read-write permission, flush the
+ + * caches and finally write.  A competing pread thread will be held
+ + * off during the recovery process since data read back might not be
+ + * valid, and this is achieved by clearing the badblock records after
+ + * the recovery write is complete. Competing recovery write threads
+ + * are already serialized by writer lock held by dax_iomap_rw().
+ + */
+ +static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
+ +              void *addr, size_t bytes, struct iov_iter *i)
   {
         struct pmem_device *pmem = dax_get_private(dax_dev);
+ +      size_t olen, len, off;
+ +      phys_addr_t pmem_off;
+ +      struct device *dev = pmem->bb.dev;
+ +      long cleared;
+ +
+ +      off = offset_in_page(addr);
+ +      len = PFN_PHYS(PFN_UP(off + bytes));
+ +      if (!is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) >> SECTOR_SHIFT, len))
+ +              return _copy_from_iter_flushcache(addr, bytes, i);
+ +
+ +      /*
+ +       * Not page-aligned range cannot be recovered. This should not
+ +       * happen unless something else went wrong.
+ +       */
+ +      if (off || !PAGE_ALIGNED(bytes)) {
+ +              dev_dbg(dev, "Found poison, but addr(%p) or bytes(%#zx) not page aligned\n",
+ +                      addr, bytes);
+ +              return 0;
+ +      }
+ +
+ +      pmem_off = PFN_PHYS(pgoff) + pmem->data_offset;
+ +      cleared = __pmem_clear_poison(pmem, pmem_off, len);
+ +      if (cleared > 0 && cleared < len) {
+ +              dev_dbg(dev, "poison cleared only %ld out of %zu bytes\n",
+ +                      cleared, len);
+ +              return 0;
+ +      }
+ +      if (cleared < 0) {
+ +              dev_dbg(dev, "poison clear failed: %ld\n", cleared);
+ +              return 0;
+ +      }
+ +
+ +      olen = _copy_from_iter_flushcache(addr, bytes, i);
+ +      pmem_clear_bb(pmem, to_sect(pmem, pmem_off), cleared >> SECTOR_SHIFT);
   
- -      return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
+ +      return olen;
   }
   
   static const struct dax_operations pmem_dax_ops = {
         .direct_access = pmem_dax_direct_access,
         .zero_page_range = pmem_dax_zero_page_range,
+ +      .recovery_write = pmem_recovery_write,
   };
   
   static ssize_t write_cache_show(struct device *dev,
@@@ -660,7 -573,7 +660,7 @@@ static void nd_pmem_remove(struct devic
                 nvdimm_namespace_detach_btt(to_nd_btt(dev));
         else {
                 /*
-                * Note, this assumes nd_device_lock() context to not
+                * Note, this assumes device_lock() context to not
                  * race nd_pmem_notify()
                  */
                 sysfs_put(pmem->bb_state);
diff --combined include/acpi/acpi_bus.h

index 772590e2eddb8eaca3789a741b59a0847eb724da,4c463ae2777b9f33a290b9ec5b972e3cca53127a..0dc1ea0b52f51de5009caddb343debf4bc02c308
--- 1/include/acpi/acpi_bus.h
--- 2/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@@ -481,8 -481,6 +481,8 @@@ void acpi_initialize_hp_context(struct 
   extern struct bus_type acpi_bus_type;
   
   int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data);
+ +int acpi_dev_for_each_child(struct acpi_device *adev,
+ +                          int (*fn)(struct acpi_device *, void *), void *data);
   
   /*
    * Events
@@@ -524,7 -522,6 +524,7 @@@ int acpi_device_fix_up_power(struct acp
   int acpi_bus_update_power(acpi_handle handle, int *state_p);
   int acpi_device_update_power(struct acpi_device *device, int *state_p);
   bool acpi_bus_power_manageable(acpi_handle handle);
+ +void acpi_dev_power_up_children_with_adr(struct acpi_device *adev);
   int acpi_device_power_add_dependent(struct acpi_device *adev,
                                     struct device *dev);
   void acpi_device_power_remove_dependent(struct acpi_device *adev,
@@@ -585,14 -582,22 +585,22 @@@ int unregister_acpi_bus_type(struct acp
   int acpi_bind_one(struct device *dev, struct acpi_device *adev);
   int acpi_unbind_one(struct device *dev);
   
+ enum acpi_bridge_type {
+       ACPI_BRIDGE_TYPE_PCIE = 1,
+       ACPI_BRIDGE_TYPE_CXL,
+ };
+ 
   struct acpi_pci_root {
         struct acpi_device * device;
         struct pci_bus *bus;
         u16 segment;
+       int bridge_type;
         struct resource secondary;      /* downstream bus range */
   
-       u32 osc_support_set;    /* _OSC state of support bits */
-       u32 osc_control_set;    /* _OSC state of control bits */
+       u32 osc_support_set;            /* _OSC state of support bits */
+       u32 osc_control_set;            /* _OSC state of control bits */
+       u32 osc_ext_support_set;        /* _OSC state of extended support bits */
+       u32 osc_ext_control_set;        /* _OSC state of extended control bits */
         phys_addr_t mcfg_addr;
   };
   
diff --combined include/linux/acpi.h

index 03465db16b68950816e09035c4a844e86e0e494c,378a431666b3d5f8b2f255bb2926d7ce1731fec8..f3fdfd784a6c9f640870b75825d452c7c9900315
--- 1/include/linux/acpi.h
--- 2/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@@ -550,10 -550,16 +550,16 @@@ struct acpi_osc_context 
   
   acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
   
- /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */
+ /* Number of _OSC capability DWORDS depends on bridge type */
+ #define OSC_PCI_CAPABILITY_DWORDS             3
+ #define OSC_CXL_CAPABILITY_DWORDS             5
+ 
+ /* Indexes into _OSC Capabilities Buffer (DWORDs 2 to 5 are device-specific) */
   #define OSC_QUERY_DWORD                               0       /* DWORD 1 */
   #define OSC_SUPPORT_DWORD                     1       /* DWORD 2 */
   #define OSC_CONTROL_DWORD                     2       /* DWORD 3 */
+ #define OSC_EXT_SUPPORT_DWORD                 3       /* DWORD 4 */
+ #define OSC_EXT_CONTROL_DWORD                 4       /* DWORD 5 */
   
   /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */
   #define OSC_QUERY_ENABLE                      0x00000001  /* input */
@@@ -574,7 -580,6 +580,7 @@@
   #define OSC_SB_OSLPI_SUPPORT                  0x00000100
   #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT               0x00001000
   #define OSC_SB_GENERIC_INITIATOR_SUPPORT      0x00002000
+ +#define OSC_SB_CPC_FLEXIBLE_ADR_SPACE         0x00004000
   #define OSC_SB_NATIVE_USB4_SUPPORT            0x00040000
   #define OSC_SB_PRM_SUPPORT                    0x00200000
   
@@@ -582,7 -587,6 +588,7 @@@ extern bool osc_sb_apei_support_acked
   extern bool osc_pc_lpi_support_confirmed;
   extern bool osc_sb_native_usb4_support_confirmed;
   extern bool osc_sb_cppc_not_supported;
+ +extern bool osc_cpc_flexible_adr_space_confirmed;
   
   /* USB4 Capabilities */
   #define OSC_USB_USB3_TUNNELING                        0x00000001
@@@ -610,6 -614,29 +616,29 @@@ extern u32 osc_sb_native_usb4_control
   #define OSC_PCI_EXPRESS_LTR_CONTROL           0x00000020
   #define OSC_PCI_EXPRESS_DPC_CONTROL           0x00000080
   
+ /* CXL _OSC: Capabilities DWORD 4: Support Field */
+ #define OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT   0x00000001
+ #define OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT       0x00000002
+ #define OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT        0x00000004
+ #define OSC_CXL_NATIVE_HP_SUPPORT             0x00000008
+ 
+ /* CXL _OSC: Capabilities DWORD 5: Control Field */
+ #define OSC_CXL_ERROR_REPORTING_CONTROL               0x00000001
+ 
+ static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context)
+ {
+       u32 *ret = context->ret.pointer;
+ 
+       return ret[OSC_CONTROL_DWORD];
+ }
+ 
+ static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context)
+ {
+       u32 *ret = context->ret.pointer;
+ 
+       return ret[OSC_EXT_CONTROL_DWORD];
+ }
+ 
   #define ACPI_GSB_ACCESS_ATTRIB_QUICK          0x00000002
   #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV         0x00000004
   #define ACPI_GSB_ACCESS_ATTRIB_BYTE           0x00000006
@@@ -1006,6 -1033,17 +1035,17 @@@ static inline int acpi_register_wakeup_
   static inline void acpi_unregister_wakeup_handler(
         bool (*wakeup)(void *context), void *context) { }
   
+ struct acpi_osc_context;
+ static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context)
+ {
+       return 0;
+ }
+ 
+ static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context)
+ {
+       return 0;
+ }
+ 
   #endif        /* !CONFIG_ACPI */
   
   #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
diff --combined include/linux/lockdep.h

index 37951c17908e717723639edc31869dd27dbc92ad,43b0dc6a0b2148a6def7c512ab905a6c0dd4d65c..b6829b970093665db5f1fd783f1617de76409003
--- 1/include/linux/lockdep.h
--- 2/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@@ -16,6 -16,10 +16,6 @@@
   
   struct task_struct;
   
- -/* for sysctl */
- -extern int prove_locking;
- -extern int lock_stat;
- -
   #ifdef CONFIG_LOCKDEP
   
   #include <linux/linkage.h>
@@@ -286,6 -290,9 +286,9 @@@ extern void lock_set_class(struct lockd
                            struct lock_class_key *key, unsigned int subclass,
                            unsigned long ip);
   
+ #define lock_set_novalidate_class(l, n, i) \
+       lock_set_class(l, n, &__lockdep_no_validate__, 0, i)
+ 
   static inline void lock_set_subclass(struct lockdep_map *lock,
                 unsigned int subclass, unsigned long ip)
   {
@@@ -353,7 -360,8 +356,8 @@@ static inline void lockdep_set_selftest
   # define lock_acquire(l, s, t, r, c, n, i)    do { } while (0)
   # define lock_release(l, i)                   do { } while (0)
   # define lock_downgrade(l, i)                 do { } while (0)
- # define lock_set_class(l, n, k, s, i)                do { } while (0)
+ # define lock_set_class(l, n, key, s, i)      do { (void)(key); } while (0)
+ # define lock_set_novalidate_class(l, n, i)   do { } while (0)
   # define lock_set_subclass(l, s, i)           do { } while (0)
   # define lockdep_init()                               do { } while (0)
   # define lockdep_init_map_type(lock, name, key, sub, inner, outer, type) \
diff --combined include/linux/pm.h

index ffe9419585012b1b490808a4f7de3c0111e0c72a,7911c4c9a7be6e9c5ecee7c08f585e466dc3b495..70ec69d8bafd1859e50418eb00fdd276ec7510a7
--- 1/include/linux/pm.h
--- 2/include/linux/pm.h
+++ b/include/linux/pm.h
@@@ -36,6 -36,15 +36,15 @@@ static inline void pm_vt_switch_unregis
   }
   #endif /* CONFIG_VT_CONSOLE_SLEEP */
   
+ #ifdef CONFIG_CXL_SUSPEND
+ bool cxl_mem_active(void);
+ #else
+ static inline bool cxl_mem_active(void)
+ {
+       return false;
+ }
+ #endif
+ 
   /*
    * Device power management
    */
@@@ -368,13 -377,13 +377,13 @@@ const struct dev_pm_ops name = { 
   
   #ifdef CONFIG_PM
   #define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
- -                         runtime_resume_fn, idle_fn, sec) \
+ +                         runtime_resume_fn, idle_fn, sec, ns)         \
         _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
                            runtime_resume_fn, idle_fn); \
- -      _EXPORT_SYMBOL(name, sec)
+ +      __EXPORT_SYMBOL(name, sec, ns)
   #else
   #define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
- -                         runtime_resume_fn, idle_fn, sec) \
+ +                         runtime_resume_fn, idle_fn, sec, ns) \
   static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \
                                          resume_fn, runtime_suspend_fn, \
                                          runtime_resume_fn, idle_fn)
@@@ -391,13 -400,9 +400,13 @@@
         _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL)
   
   #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
- -      _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "")
+ +      _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", "")
   #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
- -      _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl")
+ +      _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", "")
+ +#define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns)  \
+ +      _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", #ns)
+ +#define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns)      \
+ +      _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", #ns)
   
   /* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
   #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
diff --combined kernel/power/main.c

index 5242bf2ee469aa70b1de15939385ff17758445fc,3e6be1c33e0b2b9fe2b191e4c1d6f287b7280d43..e3694034b753664d91e981b6455e005d36feed16
--- 1/kernel/power/main.c
--- 2/kernel/power/main.c
+++ b/kernel/power/main.c
@@@ -127,7 -127,9 +127,9 @@@ static ssize_t mem_sleep_show(struct ko
         char *s = buf;
         suspend_state_t i;
   
-       for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
+       for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) {
+               if (i >= PM_SUSPEND_MEM && cxl_mem_active())
+                       continue;
                 if (mem_sleep_states[i]) {
                         const char *label = mem_sleep_states[i];
   
@@@ -136,6 -138,7 +138,7 @@@
                         else
                                 s += sprintf(s, "%s ", label);
                 }
+       }
   
         /* Convert the last space to a newline if needed. */
         if (s != buf)
@@@ -545,6 -548,35 +548,6 @@@ static int __init pm_debug_messages_set
   }
   __setup("pm_debug_messages", pm_debug_messages_setup);
   
- -/**
- - * __pm_pr_dbg - Print a suspend debug message to the kernel log.
- - * @defer: Whether or not to use printk_deferred() to print the message.
- - * @fmt: Message format.
- - *
- - * The message will be emitted if enabled through the pm_debug_messages
- - * sysfs attribute.
- - */
- -void __pm_pr_dbg(bool defer, const char *fmt, ...)
- -{
- -      struct va_format vaf;
- -      va_list args;
- -
- -      if (!pm_debug_messages_on)
- -              return;
- -
- -      va_start(args, fmt);
- -
- -      vaf.fmt = fmt;
- -      vaf.va = &args;
- -
- -      if (defer)
- -              printk_deferred(KERN_DEBUG "PM: %pV", &vaf);
- -      else
- -              printk(KERN_DEBUG "PM: %pV", &vaf);
- -
- -      va_end(args);
- -}
- -
   #else /* !CONFIG_PM_SLEEP_DEBUG */
   static inline void pm_print_times_init(void) {}
   #endif /* CONFIG_PM_SLEEP_DEBUG */
diff --combined lib/Kconfig.debug

index 4cea85a833214fed013227bbd0b8442ff884ccaa,cfe3b092c31da3a77def3ae78e2cfa6020e3ac48..2e24db4bff1921483ad1bd1e181d242e6cd55345
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -485,25 -485,24 +485,25 @@@ config FRAME_POINTE
           larger and slower, but it gives very useful debugging information
           in case of kernel bugs. (precise oopses/stacktraces/warnings)
   
+ +config OBJTOOL
+ +      bool
+ +
   config STACK_VALIDATION
         bool "Compile-time stack metadata validation"
- -      depends on HAVE_STACK_VALIDATION
+ +      depends on HAVE_STACK_VALIDATION && UNWINDER_FRAME_POINTER
+ +      select OBJTOOL
         default n
         help
- -        Add compile-time checks to validate stack metadata, including frame
- -        pointers (if CONFIG_FRAME_POINTER is enabled).  This helps ensure
- -        that runtime stack traces are more reliable.
- -
- -        This is also a prerequisite for generation of ORC unwind data, which
- -        is needed for CONFIG_UNWINDER_ORC.
+ +        Validate frame pointer rules at compile-time.  This helps ensure that
+ +        runtime stack traces are more reliable.
   
           For more information, see
           tools/objtool/Documentation/stack-validation.txt.
   
- -config VMLINUX_VALIDATION
+ +config NOINSTR_VALIDATION
         bool
- -      depends on STACK_VALIDATION && DEBUG_ENTRY
+ +      depends on HAVE_NOINSTR_VALIDATION && DEBUG_ENTRY
+ +      select OBJTOOL
         default y
   
   config VMLINUX_MAP
@@@ -699,6 -698,40 +699,6 @@@ config DEBUG_OBJECTS_ENABLE_DEFAUL
         help
           Debug objects boot parameter default value
   
- -config DEBUG_SLAB
- -      bool "Debug slab memory allocations"
- -      depends on DEBUG_KERNEL && SLAB
- -      help
- -        Say Y here to have the kernel do limited verification on memory
- -        allocation as well as poisoning memory on free to catch use of freed
- -        memory. This can make kmalloc/kfree-intensive workloads much slower.
- -
- -config SLUB_DEBUG_ON
- -      bool "SLUB debugging on by default"
- -      depends on SLUB && SLUB_DEBUG
- -      default n
- -      help
- -        Boot with debugging on by default. SLUB boots by default with
- -        the runtime debug capabilities switched off. Enabling this is
- -        equivalent to specifying the "slub_debug" parameter on boot.
- -        There is no support for more fine grained debug control like
- -        possible with slub_debug=xxx. SLUB debugging may be switched
- -        off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
- -        "slub_debug=-".
- -
- -config SLUB_STATS
- -      default n
- -      bool "Enable SLUB performance statistics"
- -      depends on SLUB && SYSFS
- -      help
- -        SLUB statistics are useful to debug SLUBs allocation behavior in
- -        order find ways to optimize the allocator. This should never be
- -        enabled for production use since keeping statistics slows down
- -        the allocator by a few percentage points. The slabinfo command
- -        supports the determination of the most active slabs to figure
- -        out which slabs are relevant to a particular load.
- -        Try running: slabinfo -DA
- -
   config HAVE_DEBUG_KMEMLEAK
         bool
   
@@@ -1038,6 -1071,13 +1038,6 @@@ config BOOTPARAM_SOFTLOCKUP_PANI
   
           Say N if unsure.
   
- -config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
- -      int
- -      depends on SOFTLOCKUP_DETECTOR
- -      range 0 1
- -      default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
- -      default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
- -
   config HARDLOCKUP_DETECTOR_PERF
         bool
         select SOFTLOCKUP_DETECTOR
@@@ -1079,6 -1119,13 +1079,6 @@@ config BOOTPARAM_HARDLOCKUP_PANI
   
           Say N if unsure.
   
- -config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
- -      int
- -      depends on HARDLOCKUP_DETECTOR
- -      range 0 1
- -      default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
- -      default 1 if BOOTPARAM_HARDLOCKUP_PANIC
- -
   config DETECT_HUNG_TASK
         bool "Detect Hung Tasks"
         depends on DEBUG_KERNEL
@@@ -1126,6 -1173,13 +1126,6 @@@ config BOOTPARAM_HUNG_TASK_PANI
   
           Say N if unsure.
   
- -config BOOTPARAM_HUNG_TASK_PANIC_VALUE
- -      int
- -      depends on DETECT_HUNG_TASK
- -      range 0 1
- -      default 0 if !BOOTPARAM_HUNG_TASK_PANIC
- -      default 1 if BOOTPARAM_HUNG_TASK_PANIC
- -
   config WQ_WATCHDOG
         bool "Detect Workqueue Stalls"
         depends on DEBUG_KERNEL
@@@ -1490,29 -1544,6 +1490,6 @@@ config CSD_LOCK_WAIT_DEBU
           include the IPI handler function currently executing (if any)
           and relevant stack traces.
   
- choice
-       prompt "Lock debugging: prove subsystem device_lock() correctness"
-       depends on PROVE_LOCKING
-       help
-         For subsystems that have instrumented their usage of the device_lock()
-         with nested annotations, enable lock dependency checking. The locking
-         hierarchy 'subclass' identifiers are not compatible across
-         sub-systems, so only one can be enabled at a time.
- 
- config PROVE_NVDIMM_LOCKING
-       bool "NVDIMM"
-       depends on LIBNVDIMM
-       help
-         Enable lockdep to validate nd_device_lock() usage.
- 
- config PROVE_CXL_LOCKING
-       bool "CXL"
-       depends on CXL_BUS
-       help
-         Enable lockdep to validate cxl_device_lock() usage.
- 
- endchoice
- 
   endmenu # lock debugging
   
   config TRACE_IRQFLAGS
@@@ -1562,7 -1593,8 +1539,7 @@@ config WARN_ALL_UNSEEDED_RANDO
           so architecture maintainers really need to do what they can
           to get the CRNG seeded sooner after the system is booted.
           However, since users cannot do anything actionable to
- -        address this, by default the kernel will issue only a single
- -        warning for the first use of unseeded randomness.
+ +        address this, by default this option is disabled.
   
           Say Y here if you want to receive warnings for all uses of
           unseeded randomness.  This will be of use primarily for
@@@ -1980,11 -2012,10 +1957,11 @@@ config KCO
         bool "Code coverage for fuzzing"
         depends on ARCH_HAS_KCOV
         depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
- -      depends on !ARCH_WANTS_NO_INSTR || STACK_VALIDATION || \
+ +      depends on !ARCH_WANTS_NO_INSTR || HAVE_NOINSTR_HACK || \
                    GCC_VERSION >= 120000 || CLANG_VERSION >= 130000
         select DEBUG_FS
         select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
+ +      select OBJTOOL if HAVE_NOINSTR_HACK
         help
           KCOV exposes kernel code coverage information in a form suitable
           for coverage-guided fuzzing (randomized testing).
@@@ -2086,11 -2117,10 +2063,11 @@@ config TEST_DIV6
           If unsure, say N.
   
   config KPROBES_SANITY_TEST
- -      tristate "Kprobes sanity tests"
+ +      tristate "Kprobes sanity tests" if !KUNIT_ALL_TESTS
         depends on DEBUG_KERNEL
         depends on KPROBES
         depends on KUNIT
+ +      default KUNIT_ALL_TESTS
         help
           This option provides for testing basic kprobes functionality on
           boot. Samples of kprobe and kretprobe are inserted and
@@@ -2364,9 -2394,8 +2341,9 @@@ config TEST_SYSCT
           If unsure, say N.
   
   config BITFIELD_KUNIT
- -      tristate "KUnit test bitfield functions at runtime"
+ +      tristate "KUnit test bitfield functions at runtime" if !KUNIT_ALL_TESTS
         depends on KUNIT
+ +      default KUNIT_ALL_TESTS
         help
           Enable this option to test the bitfield functions at boot.
   
@@@ -2400,9 -2429,8 +2377,9 @@@ config HASH_KUNIT_TES
           optimized versions. If unsure, say N.
   
   config RESOURCE_KUNIT_TEST
- -      tristate "KUnit test for resource API"
+ +      tristate "KUnit test for resource API" if !KUNIT_ALL_TESTS
         depends on KUNIT
+ +      default KUNIT_ALL_TESTS
         help
           This builds the resource API unit test.
           Tests the logic of API provided by resource.c and ioport.h.
@@@ -2455,9 -2483,8 +2432,9 @@@ config LINEAR_RANGES_TES
           If unsure, say N.
   
   config CMDLINE_KUNIT_TEST
- -      tristate "KUnit test for cmdline API"
+ +      tristate "KUnit test for cmdline API" if !KUNIT_ALL_TESTS
         depends on KUNIT
+ +      default KUNIT_ALL_TESTS
         help
           This builds the cmdline API unit test.
           Tests the logic of API provided by cmdline.c.
@@@ -2467,9 -2494,8 +2444,9 @@@
           If unsure, say N.
   
   config BITS_TEST
- -      tristate "KUnit test for bits.h"
+ +      tristate "KUnit test for bits.h" if !KUNIT_ALL_TESTS
         depends on KUNIT
+ +      default KUNIT_ALL_TESTS
         help
           This builds the bits unit test.
           Tests the logic of macros defined in bits.h.
author	Linus Torvalds <[email protected]>
	Sat, 28 May 2022 04:24:19 +0000 (21:24 -0700)
committer	Linus Torvalds <[email protected]>
	Sat, 28 May 2022 04:24:19 +0000 (21:24 -0700)
		1	2
drivers/acpi/bus.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/acpi/pci_root.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvdimm/pmem.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/acpi/acpi_bus.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/acpi.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/lockdep.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/pm.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/power/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig.debug	patch \|	diff1 \|	diff2 \|	blob \| history