From: Linus Torvalds Date: Sat, 28 May 2022 04:24:19 +0000 (-0700) Subject: Merge tag 'cxl-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl X-Git-Url: https://repo.jachan.dev/J-linux.git/commitdiff_plain/9d004b2f4fea97cde123e7f1939b80e77bf2e695?hp=-c Merge tag 'cxl-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl Pull cxl updates from Dan Williams: "Compute Express Link (CXL) updates for this cycle. The highlight is new driver-core infrastructure and CXL subsystem changes for allowing lockdep to validate device_lock() usage. Thanks to PeterZ for setting me straight on the current capabilities of the lockdep API, and Greg acked it as well. On the CXL ACPI side this update adds support for CXL _OSC so that platform firmware knows that it is safe to still grant Linux native control of PCIe hotplug and error handling in the presence of CXL devices. A circular dependency problem was discovered between suspend and CXL memory for cases where the suspend image might be stored in CXL memory where that image also contains the PCI register state to restore to re-enable the device. Disable suspend for now until an architecture is defined to clarify that conflict. Lastly a collection of reworks, fixes, and cleanups to the CXL subsystem where support for snooping mailbox commands and properly handling the "mem_enable" flow are the highlights. Summary: - Add driver-core infrastructure for lockdep validation of device_lock(), and fixup a deadlock report that was previously hidden behind the 'lockdep no validate' policy. - Add CXL _OSC support for claiming native control of CXL hotplug and error handling. - Disable suspend in the presence of CXL memory unless and until a protocol is identified for restoring PCI device context from memory hosted on CXL PCI devices. - Add support for snooping CXL mailbox commands to protect against inopportune changes, like set-partition with the 'immediate' flag set. - Rework how the driver detects legacy CXL 1.1 configurations (CXL DVSEC / 'mem_enable') before enabling new CXL 2.0 decode configurations (CXL HDM Capability). - Miscellaneous cleanups and fixes from -next exposure" * tag 'cxl-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (47 commits) cxl/port: Enable HDM Capability after validating DVSEC Ranges cxl/port: Reuse 'struct cxl_hdm' context for hdm init cxl/port: Move endpoint HDM Decoder Capability init to port driver cxl/pci: Drop @info argument to cxl_hdm_decode_init() cxl/mem: Merge cxl_dvsec_ranges() and cxl_hdm_decode_init() cxl/mem: Skip range enumeration if mem_enable clear cxl/mem: Consolidate CXL DVSEC Range enumeration in the core cxl/pci: Move cxl_await_media_ready() to the core cxl/mem: Validate port connectivity before dvsec ranges cxl/mem: Fix cxl_mem_probe() error exit cxl/pci: Drop wait_for_valid() from cxl_await_media_ready() cxl/pci: Consolidate wait_for_media() and wait_for_media_ready() cxl/mem: Drop mem_enabled check from wait_for_media() nvdimm: Fix firmware activation deadlock scenarios device-core: Kill the lockdep_mutex nvdimm: Drop nd_device_lock() ACPI: NFIT: Drop nfit_device_lock() nvdimm: Replace lockdep_mutex with local lock classes cxl: Drop cxl_device_lock() cxl/acpi: Add root device lockdep validation ... --- 9d004b2f4fea97cde123e7f1939b80e77bf2e695 diff --combined drivers/acpi/bus.c index b67d2ee77cd1,7658acbbb2bd..86fa61a21826 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@@ -278,20 -278,6 +278,20 @@@ bool osc_sb_apei_support_acked bool osc_pc_lpi_support_confirmed; EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed); +/* + * ACPI 6.2 Section 6.2.11.2 'Platform-Wide OSPM Capabilities': + * Starting with ACPI Specification 6.2, all _CPC registers can be in + * PCC, System Memory, System IO, or Functional Fixed Hardware address + * spaces. OSPM support for this more flexible register space scheme is + * indicated by the “Flexible Address Space for CPPC Registers” _OSC bit. + * + * Otherwise (cf ACPI 6.1, s8.4.7.1.1.X), _CPC registers must be in: + * - PCC or Functional Fixed Hardware address space if defined + * - SystemMemory address space (NULL register) if not defined + */ +bool osc_cpc_flexible_adr_space_confirmed; +EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed); + /* * ACPI 6.4 Operating System Capabilities for USB. */ @@@ -329,15 -315,12 +329,15 @@@ static void acpi_bus_osc_negotiate_plat #endif #ifdef CONFIG_X86 capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT; - if (boot_cpu_has(X86_FEATURE_HWP)) { - capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT; - capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT; - } #endif +#ifdef CONFIG_ACPI_CPPC_LIB + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT; + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT; +#endif + + capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_FLEXIBLE_ADR_SPACE; + if (IS_ENABLED(CONFIG_SCHED_MC_PRIO)) capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT; @@@ -358,9 -341,10 +358,9 @@@ return; } -#ifdef CONFIG_X86 - if (boot_cpu_has(X86_FEATURE_HWP)) - osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] & - (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT)); +#ifdef CONFIG_ACPI_CPPC_LIB + osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] & + (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT)); #endif /* @@@ -382,8 -366,6 +382,8 @@@ capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT; osc_sb_native_usb4_support_confirmed = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT; + osc_cpc_flexible_adr_space_confirmed = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPC_FLEXIBLE_ADR_SPACE; } kfree(context.ret.pointer); @@@ -443,7 -425,7 +443,7 @@@ static void acpi_bus_osc_negotiate_usb_ } osc_sb_native_usb4_control = - control & ((u32 *)context.ret.pointer)[OSC_CONTROL_DWORD]; + control & acpi_osc_ctx_get_pci_control(&context); acpi_bus_decode_usb_osc("USB4 _OSC: OS supports", control); acpi_bus_decode_usb_osc("USB4 _OSC: OS controls", @@@ -1088,32 -1070,6 +1088,32 @@@ int acpi_bus_for_each_dev(int (*fn)(str } EXPORT_SYMBOL_GPL(acpi_bus_for_each_dev); +struct acpi_dev_walk_context { + int (*fn)(struct acpi_device *, void *); + void *data; +}; + +static int acpi_dev_for_one_check(struct device *dev, void *context) +{ + struct acpi_dev_walk_context *adwc = context; + + if (dev->bus != &acpi_bus_type) + return 0; + + return adwc->fn(to_acpi_device(dev), adwc->data); +} + +int acpi_dev_for_each_child(struct acpi_device *adev, + int (*fn)(struct acpi_device *, void *), void *data) +{ + struct acpi_dev_walk_context adwc = { + .fn = fn, + .data = data, + }; + + return device_for_each_child(&adev->dev, &adwc, acpi_dev_for_one_check); +} + /* -------------------------------------------------------------------------- Initialization/Cleanup -------------------------------------------------------------------------- */ diff --combined drivers/acpi/pci_root.c index b3b507f20e87,c82ad63fffed..d57cf8454b93 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@@ -140,6 -140,17 +140,17 @@@ static struct pci_osc_bit_struct pci_os { OSC_PCI_EXPRESS_DPC_CONTROL, "DPC" }, }; + static struct pci_osc_bit_struct cxl_osc_support_bit[] = { + { OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT, "CXL11PortRegAccess" }, + { OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT, "CXL20PortDevRegAccess" }, + { OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT, "CXLProtocolErrorReporting" }, + { OSC_CXL_NATIVE_HP_SUPPORT, "CXLNativeHotPlug" }, + }; + + static struct pci_osc_bit_struct cxl_osc_control_bit[] = { + { OSC_CXL_ERROR_REPORTING_CONTROL, "CXLMemErrorReporting" }, + }; + static void decode_osc_bits(struct acpi_pci_root *root, char *msg, u32 word, struct pci_osc_bit_struct *table, int size) { @@@ -168,33 -179,73 +179,73 @@@ static void decode_osc_control(struct a ARRAY_SIZE(pci_osc_control_bit)); } + static void decode_cxl_osc_support(struct acpi_pci_root *root, char *msg, u32 word) + { + decode_osc_bits(root, msg, word, cxl_osc_support_bit, + ARRAY_SIZE(cxl_osc_support_bit)); + } + + static void decode_cxl_osc_control(struct acpi_pci_root *root, char *msg, u32 word) + { + decode_osc_bits(root, msg, word, cxl_osc_control_bit, + ARRAY_SIZE(cxl_osc_control_bit)); + } + + static inline bool is_pcie(struct acpi_pci_root *root) + { + return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE; + } + + static inline bool is_cxl(struct acpi_pci_root *root) + { + return root->bridge_type == ACPI_BRIDGE_TYPE_CXL; + } + static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766"; + static u8 cxl_osc_uuid_str[] = "68F2D50B-C469-4d8A-BD3D-941A103FD3FC"; - static acpi_status acpi_pci_run_osc(acpi_handle handle, - const u32 *capbuf, u32 *retval) + static char *to_uuid(struct acpi_pci_root *root) + { + if (is_cxl(root)) + return cxl_osc_uuid_str; + return pci_osc_uuid_str; + } + + static int cap_length(struct acpi_pci_root *root) + { + if (is_cxl(root)) + return sizeof(u32) * OSC_CXL_CAPABILITY_DWORDS; + return sizeof(u32) * OSC_PCI_CAPABILITY_DWORDS; + } + + static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, + const u32 *capbuf, u32 *pci_control, + u32 *cxl_control) { struct acpi_osc_context context = { - .uuid_str = pci_osc_uuid_str, + .uuid_str = to_uuid(root), .rev = 1, - .cap.length = 12, + .cap.length = cap_length(root), .cap.pointer = (void *)capbuf, }; acpi_status status; - status = acpi_run_osc(handle, &context); + status = acpi_run_osc(root->device->handle, &context); if (ACPI_SUCCESS(status)) { - *retval = *((u32 *)(context.ret.pointer + 8)); + *pci_control = acpi_osc_ctx_get_pci_control(&context); + if (is_cxl(root)) + *cxl_control = acpi_osc_ctx_get_cxl_control(&context); kfree(context.ret.pointer); } return status; } - static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, - u32 support, - u32 *control) + static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 support, + u32 *control, u32 cxl_support, + u32 *cxl_control) { acpi_status status; - u32 result, capbuf[3]; + u32 pci_result, cxl_result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; support |= root->osc_support_set; @@@ -202,10 -253,28 +253,28 @@@ capbuf[OSC_SUPPORT_DWORD] = support; capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set; - status = acpi_pci_run_osc(root->device->handle, capbuf, &result); + if (is_cxl(root)) { + cxl_support |= root->osc_ext_support_set; + capbuf[OSC_EXT_SUPPORT_DWORD] = cxl_support; + capbuf[OSC_EXT_CONTROL_DWORD] = *cxl_control | root->osc_ext_control_set; + } + + retry: + status = acpi_pci_run_osc(root, capbuf, &pci_result, &cxl_result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; - *control = result; + *control = pci_result; + if (is_cxl(root)) { + root->osc_ext_support_set = cxl_support; + *cxl_control = cxl_result; + } + } else if (is_cxl(root)) { + /* + * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC + * upon any failure using CXL _OSC. + */ + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + goto retry; } return status; } @@@ -321,6 -390,8 +390,8 @@@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev) * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex). * @mask: Mask of _OSC bits to request control of, place to store control mask. * @support: _OSC supported capability. + * @cxl_mask: Mask of CXL _OSC control bits, place to store control mask. + * @cxl_support: CXL _OSC supported capability. * * Run _OSC query for @mask and if that is successful, compare the returned * mask of control bits with @req. If all of the @req bits are set in the @@@ -331,12 -402,14 +402,14 @@@ * _OSC bits the BIOS has granted control of, but its contents are meaningless * on failure. **/ - static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 support) + static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, + u32 support, u32 *cxl_mask, + u32 cxl_support) { u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL; struct acpi_pci_root *root; acpi_status status; - u32 ctrl, capbuf[3]; + u32 ctrl, cxl_ctrl = 0, capbuf[OSC_CXL_CAPABILITY_DWORDS]; if (!mask) return AE_BAD_PARAMETER; @@@ -348,20 -421,42 +421,42 @@@ ctrl = *mask; *mask |= root->osc_control_set; + if (is_cxl(root)) { + cxl_ctrl = *cxl_mask; + *cxl_mask |= root->osc_ext_control_set; + } + /* Need to check the available controls bits before requesting them. */ do { - status = acpi_pci_query_osc(root, support, mask); + u32 pci_missing = 0, cxl_missing = 0; + + status = acpi_pci_query_osc(root, support, mask, cxl_support, + cxl_mask); if (ACPI_FAILURE(status)) return status; - if (ctrl == *mask) - break; - decode_osc_control(root, "platform does not support", - ctrl & ~(*mask)); + if (is_cxl(root)) { + if (ctrl == *mask && cxl_ctrl == *cxl_mask) + break; + pci_missing = ctrl & ~(*mask); + cxl_missing = cxl_ctrl & ~(*cxl_mask); + } else { + if (ctrl == *mask) + break; + pci_missing = ctrl & ~(*mask); + } + if (pci_missing) + decode_osc_control(root, "platform does not support", + pci_missing); + if (cxl_missing) + decode_cxl_osc_control(root, "CXL platform does not support", + cxl_missing); ctrl = *mask; - } while (*mask); + cxl_ctrl = *cxl_mask; + } while (*mask || *cxl_mask); /* No need to request _OSC if the control was already granted. */ - if ((root->osc_control_set & ctrl) == ctrl) + if ((root->osc_control_set & ctrl) == ctrl && + (root->osc_ext_control_set & cxl_ctrl) == cxl_ctrl) return AE_OK; if ((ctrl & req) != req) { @@@ -373,11 -468,17 +468,17 @@@ capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set; capbuf[OSC_CONTROL_DWORD] = ctrl; - status = acpi_pci_run_osc(handle, capbuf, mask); + if (is_cxl(root)) { + capbuf[OSC_EXT_SUPPORT_DWORD] = root->osc_ext_support_set; + capbuf[OSC_EXT_CONTROL_DWORD] = cxl_ctrl; + } + + status = acpi_pci_run_osc(root, capbuf, mask, cxl_mask); if (ACPI_FAILURE(status)) return status; root->osc_control_set = *mask; + root->osc_ext_control_set = *cxl_mask; return AE_OK; } @@@ -403,6 -504,53 +504,53 @@@ static u32 calculate_support(void return support; } + /* + * Background on hotplug support, and making it depend on only + * CONFIG_HOTPLUG_PCI_PCIE vs. also considering CONFIG_MEMORY_HOTPLUG: + * + * CONFIG_ACPI_HOTPLUG_MEMORY does depend on CONFIG_MEMORY_HOTPLUG, but + * there is no existing _OSC for memory hotplug support. The reason is that + * ACPI memory hotplug requires the OS to acknowledge / coordinate with + * memory plug events via a scan handler. On the CXL side the equivalent + * would be if Linux supported the Mechanical Retention Lock [1], or + * otherwise had some coordination for the driver of a PCI device + * undergoing hotplug to be consulted on whether the hotplug should + * proceed or not. + * + * The concern is that if Linux says no to supporting CXL hotplug then + * the BIOS may say no to giving the OS hotplug control of any other PCIe + * device. So the question here is not whether hotplug is enabled, it's + * whether it is handled natively by the at all OS, and if + * CONFIG_HOTPLUG_PCI_PCIE is enabled then the answer is "yes". + * + * Otherwise, the plan for CXL coordinated remove, since the kernel does + * not support blocking hotplug, is to require the memory device to be + * disabled before hotplug is attempted. When CONFIG_MEMORY_HOTPLUG is + * disabled that step will fail and the remove attempt cancelled by the + * user. If that is not honored and the card is removed anyway then it + * does not matter if CONFIG_MEMORY_HOTPLUG is enabled or not, it will + * cause a crash and other badness. + * + * Therefore, just say yes to CXL hotplug and require removal to + * be coordinated by userspace unless and until the kernel grows better + * mechanisms for doing "managed" removal of devices in consultation with + * the driver. + * + * [1]: https://lore.kernel.org/all/20201122014203.4706-1-ashok.raj@intel.com/ + */ + static u32 calculate_cxl_support(void) + { + u32 support; + + support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT; + if (pci_aer_available()) + support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT; + if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + support |= OSC_CXL_NATIVE_HP_SUPPORT; + + return support; + } + static u32 calculate_control(void) { u32 control; @@@ -434,6 -582,16 +582,16 @@@ return control; } + static u32 calculate_cxl_control(void) + { + u32 control = 0; + + if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) + control |= OSC_CXL_ERROR_REPORTING_CONTROL; + + return control; + } + static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) { struct acpi_device *device = root->device; @@@ -452,10 -610,10 +610,10 @@@ return true; } - static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, - bool is_pcie) + static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) { u32 support, control = 0, requested = 0; + u32 cxl_support = 0, cxl_control = 0, cxl_requested = 0; acpi_status status; struct acpi_device *device = root->device; acpi_handle handle = device->handle; @@@ -479,10 -637,20 +637,20 @@@ if (os_control_query_checks(root, support)) requested = control = calculate_control(); - status = acpi_pci_osc_control_set(handle, &control, support); + if (is_cxl(root)) { + cxl_support = calculate_cxl_support(); + decode_cxl_osc_support(root, "OS supports", cxl_support); + cxl_requested = cxl_control = calculate_cxl_control(); + } + + status = acpi_pci_osc_control_set(handle, &control, support, + &cxl_control, cxl_support); if (ACPI_SUCCESS(status)) { if (control) decode_osc_control(root, "OS now controls", control); + if (cxl_control) + decode_cxl_osc_control(root, "OS now controls", + cxl_control); if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { /* @@@ -504,13 -672,18 +672,18 @@@ *no_aspm = 1; /* _OSC is optional for PCI host bridges */ - if ((status == AE_NOT_FOUND) && !is_pcie) + if (status == AE_NOT_FOUND && !is_pcie(root)) return; if (control) { decode_osc_control(root, "OS requested", requested); decode_osc_control(root, "platform willing to grant", control); } + if (cxl_control) { + decode_cxl_osc_control(root, "OS requested", cxl_requested); + decode_cxl_osc_control(root, "platform willing to grant", + cxl_control); + } dev_info(&device->dev, "_OSC: platform retains control of PCIe features (%s)\n", acpi_format_exception(status)); @@@ -527,7 -700,7 +700,7 @@@ static int acpi_pci_root_add(struct acp acpi_handle handle = device->handle; int no_aspm = 0; bool hotadd = system_state == SYSTEM_RUNNING; - bool is_pcie; + const char *acpi_hid; root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); if (!root) @@@ -585,8 -758,15 +758,15 @@@ root->mcfg_addr = acpi_pci_root_get_mcfg_addr(handle); - is_pcie = strcmp(acpi_device_hid(device), "PNP0A08") == 0; - negotiate_os_control(root, &no_aspm, is_pcie); + acpi_hid = acpi_device_hid(root->device); + if (strcmp(acpi_hid, "PNP0A08") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + else if (strcmp(acpi_hid, "ACPI0016") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_CXL; + else + dev_dbg(&device->dev, "Assuming non-PCIe host bridge\n"); + + negotiate_os_control(root, &no_aspm); /* * TBD: Need PCI interface for enumeration/configuration of roots. @@@ -927,8 -1107,6 +1107,8 @@@ struct pci_bus *acpi_pci_root_create(st host_bridge->preserve_config = 1; ACPI_FREE(obj); + acpi_dev_power_up_children_with_adr(device); + pci_scan_child_bus(bus); pci_set_host_bridge_release(host_bridge, acpi_pci_root_release_info, info); diff --combined drivers/nvdimm/pmem.c index 6b24ecada695,3992521c151f..629d10fcf53b --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@@ -45,25 -45,9 +45,25 @@@ static struct nd_region *to_region(stru return to_nd_region(to_dev(pmem)->parent); } -static void hwpoison_clear(struct pmem_device *pmem, - phys_addr_t phys, unsigned int len) +static phys_addr_t to_phys(struct pmem_device *pmem, phys_addr_t offset) { + return pmem->phys_addr + offset; +} + +static sector_t to_sect(struct pmem_device *pmem, phys_addr_t offset) +{ + return (offset - pmem->data_offset) >> SECTOR_SHIFT; +} + +static phys_addr_t to_offset(struct pmem_device *pmem, sector_t sector) +{ + return (sector << SECTOR_SHIFT) + pmem->data_offset; +} + +static void pmem_mkpage_present(struct pmem_device *pmem, phys_addr_t offset, + unsigned int len) +{ + phys_addr_t phys = to_phys(pmem, offset); unsigned long pfn_start, pfn_end, pfn; /* only pmem in the linear map supports HWPoison */ @@@ -85,40 -69,33 +85,40 @@@ } } -static blk_status_t pmem_clear_poison(struct pmem_device *pmem, - phys_addr_t offset, unsigned int len) +static void pmem_clear_bb(struct pmem_device *pmem, sector_t sector, long blks) { - struct device *dev = to_dev(pmem); - sector_t sector; - long cleared; - blk_status_t rc = BLK_STS_OK; + if (blks == 0) + return; + badblocks_clear(&pmem->bb, sector, blks); + if (pmem->bb_state) + sysfs_notify_dirent(pmem->bb_state); +} - sector = (offset - pmem->data_offset) / 512; +static long __pmem_clear_poison(struct pmem_device *pmem, + phys_addr_t offset, unsigned int len) +{ + phys_addr_t phys = to_phys(pmem, offset); + long cleared = nvdimm_clear_poison(to_dev(pmem), phys, len); - cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); - if (cleared < len) - rc = BLK_STS_IOERR; - if (cleared > 0 && cleared / 512) { - hwpoison_clear(pmem, pmem->phys_addr + offset, cleared); - cleared /= 512; - dev_dbg(dev, "%#llx clear %ld sector%s\n", - (unsigned long long) sector, cleared, - cleared > 1 ? "s" : ""); - badblocks_clear(&pmem->bb, sector, cleared); - if (pmem->bb_state) - sysfs_notify_dirent(pmem->bb_state); + if (cleared > 0) { + pmem_mkpage_present(pmem, offset, cleared); + arch_invalidate_pmem(pmem->virt_addr + offset, len); } + return cleared; +} - arch_invalidate_pmem(pmem->virt_addr + offset, len); +static blk_status_t pmem_clear_poison(struct pmem_device *pmem, + phys_addr_t offset, unsigned int len) +{ + long cleared = __pmem_clear_poison(pmem, offset, len); - return rc; + if (cleared < 0) + return BLK_STS_IOERR; + + pmem_clear_bb(pmem, to_sect(pmem, offset), cleared >> SECTOR_SHIFT); + if (cleared < len) + return BLK_STS_IOERR; + return BLK_STS_OK; } static void write_pmem(void *pmem_addr, struct page *page, @@@ -166,7 -143,7 +166,7 @@@ static blk_status_t pmem_do_read(struc sector_t sector, unsigned int len) { blk_status_t rc; - phys_addr_t pmem_off = sector * 512 + pmem->data_offset; + phys_addr_t pmem_off = to_offset(pmem, sector); void *pmem_addr = pmem->virt_addr + pmem_off; if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) @@@ -181,20 -158,36 +181,20 @@@ static blk_status_t pmem_do_write(struc struct page *page, unsigned int page_off, sector_t sector, unsigned int len) { - blk_status_t rc = BLK_STS_OK; - bool bad_pmem = false; - phys_addr_t pmem_off = sector * 512 + pmem->data_offset; + phys_addr_t pmem_off = to_offset(pmem, sector); void *pmem_addr = pmem->virt_addr + pmem_off; - if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) - bad_pmem = true; + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) { + blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len); + + if (rc != BLK_STS_OK) + return rc; + } - /* - * Note that we write the data both before and after - * clearing poison. The write before clear poison - * handles situations where the latest written data is - * preserved and the clear poison operation simply marks - * the address range as valid without changing the data. - * In this case application software can assume that an - * interrupted write will either return the new good - * data or an error. - * - * However, if pmem_clear_poison() leaves the data in an - * indeterminate state we need to perform the write - * after clear poison. - */ flush_dcache_page(page); write_pmem(pmem_addr, page, page_off, len); - if (unlikely(bad_pmem)) { - rc = pmem_clear_poison(pmem, pmem_off, len); - write_pmem(pmem_addr, page, page_off, len); - } - return rc; + return BLK_STS_OK; } static void pmem_submit_bio(struct bio *bio) @@@ -262,47 -255,24 +262,47 @@@ static int pmem_rw_page(struct block_de /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, enum dax_access_mode mode, void **kaddr, + pfn_t *pfn) { resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; - - if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, - PFN_PHYS(nr_pages)))) - return -EIO; + sector_t sector = PFN_PHYS(pgoff) >> SECTOR_SHIFT; + unsigned int num = PFN_PHYS(nr_pages) >> SECTOR_SHIFT; + struct badblocks *bb = &pmem->bb; + sector_t first_bad; + int num_bad; if (kaddr) *kaddr = pmem->virt_addr + offset; if (pfn) *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); + if (bb->count && + badblocks_check(bb, sector, num, &first_bad, &num_bad)) { + long actual_nr; + + if (mode != DAX_RECOVERY_WRITE) + return -EIO; + + /* + * Set the recovery stride is set to kernel page size because + * the underlying driver and firmware clear poison functions + * don't appear to handle large chunk(such as 2MiB) reliably. + */ + actual_nr = PHYS_PFN( + PAGE_ALIGN((first_bad - sector) << SECTOR_SHIFT)); + dev_dbg(pmem->bb.dev, "start sector(%llu), nr_pages(%ld), first_bad(%llu), actual_nr(%ld)\n", + sector, nr_pages, first_bad, actual_nr); + if (actual_nr) + return actual_nr; + return 1; + } + /* - * If badblocks are present, limit known good range to the - * requested range. + * If badblocks are present but not in the range, limit known good range + * to the requested range. */ - if (unlikely(pmem->bb.count)) + if (bb->count) return nr_pages; return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); } @@@ -324,73 -294,16 +324,73 @@@ static int pmem_dax_zero_page_range(str } static long pmem_dax_direct_access(struct dax_device *dax_dev, - pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) + pgoff_t pgoff, long nr_pages, enum dax_access_mode mode, + void **kaddr, pfn_t *pfn) +{ + struct pmem_device *pmem = dax_get_private(dax_dev); + + return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn); +} + +/* + * The recovery write thread started out as a normal pwrite thread and + * when the filesystem was told about potential media error in the + * range, filesystem turns the normal pwrite to a dax_recovery_write. + * + * The recovery write consists of clearing media poison, clearing page + * HWPoison bit, reenable page-wide read-write permission, flush the + * caches and finally write. A competing pread thread will be held + * off during the recovery process since data read back might not be + * valid, and this is achieved by clearing the badblock records after + * the recovery write is complete. Competing recovery write threads + * are already serialized by writer lock held by dax_iomap_rw(). + */ +static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) { struct pmem_device *pmem = dax_get_private(dax_dev); + size_t olen, len, off; + phys_addr_t pmem_off; + struct device *dev = pmem->bb.dev; + long cleared; + + off = offset_in_page(addr); + len = PFN_PHYS(PFN_UP(off + bytes)); + if (!is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) >> SECTOR_SHIFT, len)) + return _copy_from_iter_flushcache(addr, bytes, i); + + /* + * Not page-aligned range cannot be recovered. This should not + * happen unless something else went wrong. + */ + if (off || !PAGE_ALIGNED(bytes)) { + dev_dbg(dev, "Found poison, but addr(%p) or bytes(%#zx) not page aligned\n", + addr, bytes); + return 0; + } + + pmem_off = PFN_PHYS(pgoff) + pmem->data_offset; + cleared = __pmem_clear_poison(pmem, pmem_off, len); + if (cleared > 0 && cleared < len) { + dev_dbg(dev, "poison cleared only %ld out of %zu bytes\n", + cleared, len); + return 0; + } + if (cleared < 0) { + dev_dbg(dev, "poison clear failed: %ld\n", cleared); + return 0; + } + + olen = _copy_from_iter_flushcache(addr, bytes, i); + pmem_clear_bb(pmem, to_sect(pmem, pmem_off), cleared >> SECTOR_SHIFT); - return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); + return olen; } static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, .zero_page_range = pmem_dax_zero_page_range, + .recovery_write = pmem_recovery_write, }; static ssize_t write_cache_show(struct device *dev, @@@ -660,7 -573,7 +660,7 @@@ static void nd_pmem_remove(struct devic nvdimm_namespace_detach_btt(to_nd_btt(dev)); else { /* - * Note, this assumes nd_device_lock() context to not + * Note, this assumes device_lock() context to not * race nd_pmem_notify() */ sysfs_put(pmem->bb_state); diff --combined include/acpi/acpi_bus.h index 772590e2eddb,4c463ae2777b..0dc1ea0b52f5 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@@ -481,8 -481,6 +481,8 @@@ void acpi_initialize_hp_context(struct extern struct bus_type acpi_bus_type; int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data); +int acpi_dev_for_each_child(struct acpi_device *adev, + int (*fn)(struct acpi_device *, void *), void *data); /* * Events @@@ -524,7 -522,6 +524,7 @@@ int acpi_device_fix_up_power(struct acp int acpi_bus_update_power(acpi_handle handle, int *state_p); int acpi_device_update_power(struct acpi_device *device, int *state_p); bool acpi_bus_power_manageable(acpi_handle handle); +void acpi_dev_power_up_children_with_adr(struct acpi_device *adev); int acpi_device_power_add_dependent(struct acpi_device *adev, struct device *dev); void acpi_device_power_remove_dependent(struct acpi_device *adev, @@@ -585,14 -582,22 +585,22 @@@ int unregister_acpi_bus_type(struct acp int acpi_bind_one(struct device *dev, struct acpi_device *adev); int acpi_unbind_one(struct device *dev); + enum acpi_bridge_type { + ACPI_BRIDGE_TYPE_PCIE = 1, + ACPI_BRIDGE_TYPE_CXL, + }; + struct acpi_pci_root { struct acpi_device * device; struct pci_bus *bus; u16 segment; + int bridge_type; struct resource secondary; /* downstream bus range */ - u32 osc_support_set; /* _OSC state of support bits */ - u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_support_set; /* _OSC state of support bits */ + u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_ext_support_set; /* _OSC state of extended support bits */ + u32 osc_ext_control_set; /* _OSC state of extended control bits */ phys_addr_t mcfg_addr; }; diff --combined include/linux/acpi.h index 03465db16b68,378a431666b3..f3fdfd784a6c --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@@ -550,10 -550,16 +550,16 @@@ struct acpi_osc_context acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); - /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ + /* Number of _OSC capability DWORDS depends on bridge type */ + #define OSC_PCI_CAPABILITY_DWORDS 3 + #define OSC_CXL_CAPABILITY_DWORDS 5 + + /* Indexes into _OSC Capabilities Buffer (DWORDs 2 to 5 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ #define OSC_CONTROL_DWORD 2 /* DWORD 3 */ + #define OSC_EXT_SUPPORT_DWORD 3 /* DWORD 4 */ + #define OSC_EXT_CONTROL_DWORD 4 /* DWORD 5 */ /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */ #define OSC_QUERY_ENABLE 0x00000001 /* input */ @@@ -574,7 -580,6 +580,7 @@@ #define OSC_SB_OSLPI_SUPPORT 0x00000100 #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000 #define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00002000 +#define OSC_SB_CPC_FLEXIBLE_ADR_SPACE 0x00004000 #define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000 #define OSC_SB_PRM_SUPPORT 0x00200000 @@@ -582,7 -587,6 +588,7 @@@ extern bool osc_sb_apei_support_acked extern bool osc_pc_lpi_support_confirmed; extern bool osc_sb_native_usb4_support_confirmed; extern bool osc_sb_cppc_not_supported; +extern bool osc_cpc_flexible_adr_space_confirmed; /* USB4 Capabilities */ #define OSC_USB_USB3_TUNNELING 0x00000001 @@@ -610,6 -614,29 +616,29 @@@ extern u32 osc_sb_native_usb4_control #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 + /* CXL _OSC: Capabilities DWORD 4: Support Field */ + #define OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT 0x00000001 + #define OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT 0x00000002 + #define OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT 0x00000004 + #define OSC_CXL_NATIVE_HP_SUPPORT 0x00000008 + + /* CXL _OSC: Capabilities DWORD 5: Control Field */ + #define OSC_CXL_ERROR_REPORTING_CONTROL 0x00000001 + + static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) + { + u32 *ret = context->ret.pointer; + + return ret[OSC_CONTROL_DWORD]; + } + + static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) + { + u32 *ret = context->ret.pointer; + + return ret[OSC_EXT_CONTROL_DWORD]; + } + #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 @@@ -1006,6 -1033,17 +1035,17 @@@ static inline int acpi_register_wakeup_ static inline void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context) { } + struct acpi_osc_context; + static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) + { + return 0; + } + + static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) + { + return 0; + } + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC diff --combined include/linux/lockdep.h index 37951c17908e,43b0dc6a0b21..b6829b970093 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@@ -16,6 -16,10 +16,6 @@@ struct task_struct; -/* for sysctl */ -extern int prove_locking; -extern int lock_stat; - #ifdef CONFIG_LOCKDEP #include @@@ -286,6 -290,9 +286,9 @@@ extern void lock_set_class(struct lockd struct lock_class_key *key, unsigned int subclass, unsigned long ip); + #define lock_set_novalidate_class(l, n, i) \ + lock_set_class(l, n, &__lockdep_no_validate__, 0, i) + static inline void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, unsigned long ip) { @@@ -353,7 -360,8 +356,8 @@@ static inline void lockdep_set_selftest # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, i) do { } while (0) # define lock_downgrade(l, i) do { } while (0) - # define lock_set_class(l, n, k, s, i) do { } while (0) + # define lock_set_class(l, n, key, s, i) do { (void)(key); } while (0) + # define lock_set_novalidate_class(l, n, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_init_map_type(lock, name, key, sub, inner, outer, type) \ diff --combined include/linux/pm.h index ffe941958501,7911c4c9a7be..70ec69d8bafd --- a/include/linux/pm.h +++ b/include/linux/pm.h @@@ -36,6 -36,15 +36,15 @@@ static inline void pm_vt_switch_unregis } #endif /* CONFIG_VT_CONSOLE_SLEEP */ + #ifdef CONFIG_CXL_SUSPEND + bool cxl_mem_active(void); + #else + static inline bool cxl_mem_active(void) + { + return false; + } + #endif + /* * Device power management */ @@@ -368,13 -377,13 +377,13 @@@ const struct dev_pm_ops name = { #ifdef CONFIG_PM #define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn, sec) \ + runtime_resume_fn, idle_fn, sec, ns) \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ runtime_resume_fn, idle_fn); \ - _EXPORT_SYMBOL(name, sec) + __EXPORT_SYMBOL(name, sec, ns) #else #define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn, sec) \ + runtime_resume_fn, idle_fn, sec, ns) \ static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \ resume_fn, runtime_suspend_fn, \ runtime_resume_fn, idle_fn) @@@ -391,13 -400,9 +400,13 @@@ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "") + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", "") #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl") + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", "") +#define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", #ns) +#define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", #ns) /* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ diff --combined kernel/power/main.c index 5242bf2ee469,3e6be1c33e0b..e3694034b753 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@@ -127,7 -127,9 +127,9 @@@ static ssize_t mem_sleep_show(struct ko char *s = buf; suspend_state_t i; - for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) { + if (i >= PM_SUSPEND_MEM && cxl_mem_active()) + continue; if (mem_sleep_states[i]) { const char *label = mem_sleep_states[i]; @@@ -136,6 -138,7 +138,7 @@@ else s += sprintf(s, "%s ", label); } + } /* Convert the last space to a newline if needed. */ if (s != buf) @@@ -545,6 -548,35 +548,6 @@@ static int __init pm_debug_messages_set } __setup("pm_debug_messages", pm_debug_messages_setup); -/** - * __pm_pr_dbg - Print a suspend debug message to the kernel log. - * @defer: Whether or not to use printk_deferred() to print the message. - * @fmt: Message format. - * - * The message will be emitted if enabled through the pm_debug_messages - * sysfs attribute. - */ -void __pm_pr_dbg(bool defer, const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - if (!pm_debug_messages_on) - return; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - if (defer) - printk_deferred(KERN_DEBUG "PM: %pV", &vaf); - else - printk(KERN_DEBUG "PM: %pV", &vaf); - - va_end(args); -} - #else /* !CONFIG_PM_SLEEP_DEBUG */ static inline void pm_print_times_init(void) {} #endif /* CONFIG_PM_SLEEP_DEBUG */ diff --combined lib/Kconfig.debug index 4cea85a83321,cfe3b092c31d..2e24db4bff19 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@@ -485,25 -485,24 +485,25 @@@ config FRAME_POINTE larger and slower, but it gives very useful debugging information in case of kernel bugs. (precise oopses/stacktraces/warnings) +config OBJTOOL + bool + config STACK_VALIDATION bool "Compile-time stack metadata validation" - depends on HAVE_STACK_VALIDATION + depends on HAVE_STACK_VALIDATION && UNWINDER_FRAME_POINTER + select OBJTOOL default n help - Add compile-time checks to validate stack metadata, including frame - pointers (if CONFIG_FRAME_POINTER is enabled). This helps ensure - that runtime stack traces are more reliable. - - This is also a prerequisite for generation of ORC unwind data, which - is needed for CONFIG_UNWINDER_ORC. + Validate frame pointer rules at compile-time. This helps ensure that + runtime stack traces are more reliable. For more information, see tools/objtool/Documentation/stack-validation.txt. -config VMLINUX_VALIDATION +config NOINSTR_VALIDATION bool - depends on STACK_VALIDATION && DEBUG_ENTRY + depends on HAVE_NOINSTR_VALIDATION && DEBUG_ENTRY + select OBJTOOL default y config VMLINUX_MAP @@@ -699,6 -698,40 +699,6 @@@ config DEBUG_OBJECTS_ENABLE_DEFAUL help Debug objects boot parameter default value -config DEBUG_SLAB - bool "Debug slab memory allocations" - depends on DEBUG_KERNEL && SLAB - help - Say Y here to have the kernel do limited verification on memory - allocation as well as poisoning memory on free to catch use of freed - memory. This can make kmalloc/kfree-intensive workloads much slower. - -config SLUB_DEBUG_ON - bool "SLUB debugging on by default" - depends on SLUB && SLUB_DEBUG - default n - help - Boot with debugging on by default. SLUB boots by default with - the runtime debug capabilities switched off. Enabling this is - equivalent to specifying the "slub_debug" parameter on boot. - There is no support for more fine grained debug control like - possible with slub_debug=xxx. SLUB debugging may be switched - off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying - "slub_debug=-". - -config SLUB_STATS - default n - bool "Enable SLUB performance statistics" - depends on SLUB && SYSFS - help - SLUB statistics are useful to debug SLUBs allocation behavior in - order find ways to optimize the allocator. This should never be - enabled for production use since keeping statistics slows down - the allocator by a few percentage points. The slabinfo command - supports the determination of the most active slabs to figure - out which slabs are relevant to a particular load. - Try running: slabinfo -DA - config HAVE_DEBUG_KMEMLEAK bool @@@ -1038,6 -1071,13 +1038,6 @@@ config BOOTPARAM_SOFTLOCKUP_PANI Say N if unsure. -config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE - int - depends on SOFTLOCKUP_DETECTOR - range 0 1 - default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC - default 1 if BOOTPARAM_SOFTLOCKUP_PANIC - config HARDLOCKUP_DETECTOR_PERF bool select SOFTLOCKUP_DETECTOR @@@ -1079,6 -1119,13 +1079,6 @@@ config BOOTPARAM_HARDLOCKUP_PANI Say N if unsure. -config BOOTPARAM_HARDLOCKUP_PANIC_VALUE - int - depends on HARDLOCKUP_DETECTOR - range 0 1 - default 0 if !BOOTPARAM_HARDLOCKUP_PANIC - default 1 if BOOTPARAM_HARDLOCKUP_PANIC - config DETECT_HUNG_TASK bool "Detect Hung Tasks" depends on DEBUG_KERNEL @@@ -1126,6 -1173,13 +1126,6 @@@ config BOOTPARAM_HUNG_TASK_PANI Say N if unsure. -config BOOTPARAM_HUNG_TASK_PANIC_VALUE - int - depends on DETECT_HUNG_TASK - range 0 1 - default 0 if !BOOTPARAM_HUNG_TASK_PANIC - default 1 if BOOTPARAM_HUNG_TASK_PANIC - config WQ_WATCHDOG bool "Detect Workqueue Stalls" depends on DEBUG_KERNEL @@@ -1490,29 -1544,6 +1490,6 @@@ config CSD_LOCK_WAIT_DEBU include the IPI handler function currently executing (if any) and relevant stack traces. - choice - prompt "Lock debugging: prove subsystem device_lock() correctness" - depends on PROVE_LOCKING - help - For subsystems that have instrumented their usage of the device_lock() - with nested annotations, enable lock dependency checking. The locking - hierarchy 'subclass' identifiers are not compatible across - sub-systems, so only one can be enabled at a time. - - config PROVE_NVDIMM_LOCKING - bool "NVDIMM" - depends on LIBNVDIMM - help - Enable lockdep to validate nd_device_lock() usage. - - config PROVE_CXL_LOCKING - bool "CXL" - depends on CXL_BUS - help - Enable lockdep to validate cxl_device_lock() usage. - - endchoice - endmenu # lock debugging config TRACE_IRQFLAGS @@@ -1562,7 -1593,8 +1539,7 @@@ config WARN_ALL_UNSEEDED_RANDO so architecture maintainers really need to do what they can to get the CRNG seeded sooner after the system is booted. However, since users cannot do anything actionable to - address this, by default the kernel will issue only a single - warning for the first use of unseeded randomness. + address this, by default this option is disabled. Say Y here if you want to receive warnings for all uses of unseeded randomness. This will be of use primarily for @@@ -1980,11 -2012,10 +1957,11 @@@ config KCO bool "Code coverage for fuzzing" depends on ARCH_HAS_KCOV depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS - depends on !ARCH_WANTS_NO_INSTR || STACK_VALIDATION || \ + depends on !ARCH_WANTS_NO_INSTR || HAVE_NOINSTR_HACK || \ GCC_VERSION >= 120000 || CLANG_VERSION >= 130000 select DEBUG_FS select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC + select OBJTOOL if HAVE_NOINSTR_HACK help KCOV exposes kernel code coverage information in a form suitable for coverage-guided fuzzing (randomized testing). @@@ -2086,11 -2117,10 +2063,11 @@@ config TEST_DIV6 If unsure, say N. config KPROBES_SANITY_TEST - tristate "Kprobes sanity tests" + tristate "Kprobes sanity tests" if !KUNIT_ALL_TESTS depends on DEBUG_KERNEL depends on KPROBES depends on KUNIT + default KUNIT_ALL_TESTS help This option provides for testing basic kprobes functionality on boot. Samples of kprobe and kretprobe are inserted and @@@ -2364,9 -2394,8 +2341,9 @@@ config TEST_SYSCT If unsure, say N. config BITFIELD_KUNIT - tristate "KUnit test bitfield functions at runtime" + tristate "KUnit test bitfield functions at runtime" if !KUNIT_ALL_TESTS depends on KUNIT + default KUNIT_ALL_TESTS help Enable this option to test the bitfield functions at boot. @@@ -2400,9 -2429,8 +2377,9 @@@ config HASH_KUNIT_TES optimized versions. If unsure, say N. config RESOURCE_KUNIT_TEST - tristate "KUnit test for resource API" + tristate "KUnit test for resource API" if !KUNIT_ALL_TESTS depends on KUNIT + default KUNIT_ALL_TESTS help This builds the resource API unit test. Tests the logic of API provided by resource.c and ioport.h. @@@ -2455,9 -2483,8 +2432,9 @@@ config LINEAR_RANGES_TES If unsure, say N. config CMDLINE_KUNIT_TEST - tristate "KUnit test for cmdline API" + tristate "KUnit test for cmdline API" if !KUNIT_ALL_TESTS depends on KUNIT + default KUNIT_ALL_TESTS help This builds the cmdline API unit test. Tests the logic of API provided by cmdline.c. @@@ -2467,9 -2494,8 +2444,9 @@@ If unsure, say N. config BITS_TEST - tristate "KUnit test for bits.h" + tristate "KUnit test for bits.h" if !KUNIT_ALL_TESTS depends on KUNIT + default KUNIT_ALL_TESTS help This builds the bits unit test. Tests the logic of macros defined in bits.h.