From: Linus Torvalds Date: Mon, 12 Dec 2022 23:48:36 +0000 (-0800) Subject: Merge tag 'cgroup-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup X-Git-Tag: v6.2-rc1~174 X-Git-Url: https://repo.jachan.dev/linux.git/commitdiff_plain/a312a8cc3c7fe96f5e54e69c676f5bd12995f44e?hp=-c Merge tag 'cgroup-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup Pull cgroup updates from Tejun Heo: "Nothing too interesting: - Add CONFIG_DEBUG_GROUP_REF which makes cgroup refcnt operations kprobable - A couple cpuset optimizations - Other misc changes including doc and test updates" * tag 'cgroup-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: remove rcu_read_lock()/rcu_read_unlock() in critical section of spin_lock_irq() cgroup/cpuset: Improve cpuset_css_alloc() description kselftest/cgroup: Add cleanup() to test_cpuset_prs.sh cgroup/cpuset: Optimize cpuset_attach() on v2 cgroup/cpuset: Skip spread flags update on v2 kselftest/cgroup: Fix gathering number of CPUs cgroup: cgroup refcnt functions should be exported when CONFIG_DEBUG_CGROUP_REF cgroup: Implement DEBUG_CGROUP_REF --- a312a8cc3c7fe96f5e54e69c676f5bd12995f44e diff --combined include/linux/cgroup.h index 2b7d077de7ef,c8441090ca4c..3410aecffdb4 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@@ -68,7 -68,6 +68,7 @@@ struct css_task_iter struct list_head iters_node; /* css_set->task_iters */ }; +extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; @@@ -310,71 -309,24 +310,24 @@@ void css_task_iter_end(struct css_task_ * Inline functions. */ + #ifdef CONFIG_DEBUG_CGROUP_REF + void css_get(struct cgroup_subsys_state *css); + void css_get_many(struct cgroup_subsys_state *css, unsigned int n); + bool css_tryget(struct cgroup_subsys_state *css); + bool css_tryget_online(struct cgroup_subsys_state *css); + void css_put(struct cgroup_subsys_state *css); + void css_put_many(struct cgroup_subsys_state *css, unsigned int n); + #else + #define CGROUP_REF_FN_ATTRS static inline + #define CGROUP_REF_EXPORT(fn) + #include + #endif + static inline u64 cgroup_id(const struct cgroup *cgrp) { return cgrp->kn->id; } - /** - * css_get - obtain a reference on the specified css - * @css: target css - * - * The caller must already have a reference. - */ - static inline void css_get(struct cgroup_subsys_state *css) - { - if (!(css->flags & CSS_NO_REF)) - percpu_ref_get(&css->refcnt); - } - - /** - * css_get_many - obtain references on the specified css - * @css: target css - * @n: number of references to get - * - * The caller must already have a reference. - */ - static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n) - { - if (!(css->flags & CSS_NO_REF)) - percpu_ref_get_many(&css->refcnt, n); - } - - /** - * css_tryget - try to obtain a reference on the specified css - * @css: target css - * - * Obtain a reference on @css unless it already has reached zero and is - * being released. This function doesn't care whether @css is on or - * offline. The caller naturally needs to ensure that @css is accessible - * but doesn't have to be holding a reference on it - IOW, RCU protected - * access is good enough for this function. Returns %true if a reference - * count was successfully obtained; %false otherwise. - */ - static inline bool css_tryget(struct cgroup_subsys_state *css) - { - if (!(css->flags & CSS_NO_REF)) - return percpu_ref_tryget(&css->refcnt); - return true; - } - - /** - * css_tryget_online - try to obtain a reference on the specified css if online - * @css: target css - * - * Obtain a reference on @css if it's online. The caller naturally needs - * to ensure that @css is accessible but doesn't have to be holding a - * reference on it - IOW, RCU protected access is good enough for this - * function. Returns %true if a reference count was successfully obtained; - * %false otherwise. - */ - static inline bool css_tryget_online(struct cgroup_subsys_state *css) - { - if (!(css->flags & CSS_NO_REF)) - return percpu_ref_tryget_live(&css->refcnt); - return true; - } - /** * css_is_dying - test whether the specified css is dying * @css: target css @@@ -395,31 -347,6 +348,6 @@@ static inline bool css_is_dying(struct return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); } - /** - * css_put - put a css reference - * @css: target css - * - * Put a reference obtained via css_get() and css_tryget_online(). - */ - static inline void css_put(struct cgroup_subsys_state *css) - { - if (!(css->flags & CSS_NO_REF)) - percpu_ref_put(&css->refcnt); - } - - /** - * css_put_many - put css references - * @css: target css - * @n: number of references to put - * - * Put references obtained via css_get() and css_tryget_online(). - */ - static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) - { - if (!(css->flags & CSS_NO_REF)) - percpu_ref_put_many(&css->refcnt, n); - } - static inline void cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); @@@ -435,18 -362,6 +363,18 @@@ static inline void cgroup_put(struct cg css_put(&cgrp->self); } +extern struct mutex cgroup_mutex; + +static inline void cgroup_lock(void) +{ + mutex_lock(&cgroup_mutex); +} + +static inline void cgroup_unlock(void) +{ + mutex_unlock(&cgroup_mutex); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@@ -461,6 -376,7 +389,6 @@@ * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ @@@ -684,6 -600,11 +612,6 @@@ static inline void pr_cont_cgroup_path( pr_cont_kernfs_path(cgrp->kn); } -static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) -{ - return cgrp->psi; -} - bool cgroup_psi_enabled(void); static inline void cgroup_init_kthreadd(void) @@@ -715,8 -636,6 +643,8 @@@ struct cgroup static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} +static inline void cgroup_lock(void) {} +static inline void cgroup_unlock(void) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, diff --combined kernel/cgroup/cgroup.c index 2319946715e0,3028f6bc7d11..15cc26513596 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@@ -248,6 -248,12 +248,12 @@@ static int cgroup_addrm_files(struct cg struct cgroup *cgrp, struct cftype cfts[], bool is_add); + #ifdef CONFIG_DEBUG_CGROUP_REF + #define CGROUP_REF_FN_ATTRS noinline + #define CGROUP_REF_EXPORT(fn) EXPORT_SYMBOL_GPL(fn); + #include + #endif + /** * cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID * @ssid: subsys ID of interest @@@ -2860,14 -2866,12 +2866,12 @@@ int cgroup_migrate(struct task_struct * * take an rcu_read_lock. */ spin_lock_irq(&css_set_lock); - rcu_read_lock(); task = leader; do { cgroup_migrate_add_task(task, mgctx); if (!threadgroup) break; } while_each_thread(leader, task); - rcu_read_unlock(); spin_unlock_irq(&css_set_lock); return cgroup_migrate_execute(mgctx); @@@ -3716,27 -3720,27 +3720,27 @@@ static int cpu_stat_show(struct seq_fil static int cgroup_io_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi; + struct psi_group *psi = cgroup_psi(cgrp); return psi_show(seq, psi, PSI_IO); } static int cgroup_memory_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi; + struct psi_group *psi = cgroup_psi(cgrp); return psi_show(seq, psi, PSI_MEM); } static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi; + struct psi_group *psi = cgroup_psi(cgrp); return psi_show(seq, psi, PSI_CPU); } -static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, enum psi_res res) +static ssize_t pressure_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, enum psi_res res) { struct cgroup_file_ctx *ctx = of->priv; struct psi_trigger *new; @@@ -3756,7 -3760,7 +3760,7 @@@ return -EBUSY; } - psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi; + psi = cgroup_psi(cgrp); new = psi_trigger_create(psi, buf, res); if (IS_ERR(new)) { cgroup_put(cgrp); @@@ -3773,86 -3777,21 +3777,86 @@@ static ssize_t cgroup_io_pressure_write char *buf, size_t nbytes, loff_t off) { - return cgroup_pressure_write(of, buf, nbytes, PSI_IO); + return pressure_write(of, buf, nbytes, PSI_IO); } static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - return cgroup_pressure_write(of, buf, nbytes, PSI_MEM); + return pressure_write(of, buf, nbytes, PSI_MEM); } static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - return cgroup_pressure_write(of, buf, nbytes, PSI_CPU); + return pressure_write(of, buf, nbytes, PSI_CPU); +} + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +static int cgroup_irq_pressure_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + struct psi_group *psi = cgroup_psi(cgrp); + + return psi_show(seq, psi, PSI_IRQ); +} + +static ssize_t cgroup_irq_pressure_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + return pressure_write(of, buf, nbytes, PSI_IRQ); +} +#endif + +static int cgroup_pressure_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + struct psi_group *psi = cgroup_psi(cgrp); + + seq_printf(seq, "%d\n", psi->enabled); + + return 0; +} + +static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + ssize_t ret; + int enable; + struct cgroup *cgrp; + struct psi_group *psi; + + ret = kstrtoint(strstrip(buf), 0, &enable); + if (ret) + return ret; + + if (enable < 0 || enable > 1) + return -ERANGE; + + cgrp = cgroup_kn_lock_live(of->kn, false); + if (!cgrp) + return -ENOENT; + + psi = cgroup_psi(cgrp); + if (psi->enabled != enable) { + int i; + + /* show or hide {cpu,memory,io,irq}.pressure files */ + for (i = 0; i < NR_PSI_RESOURCES; i++) + cgroup_file_show(&cgrp->psi_files[i], enable); + + psi->enabled = enable; + if (enable) + psi_cgroup_restart(psi); + } + + cgroup_kn_unlock(of->kn); + + return nbytes; } static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, @@@ -3872,9 -3811,6 +3876,9 @@@ static void cgroup_pressure_release(str bool cgroup_psi_enabled(void) { + if (static_branch_likely(&psi_disabled)) + return false; + return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0; } @@@ -5261,7 -5197,6 +5265,7 @@@ static struct cftype cgroup_psi_files[ #ifdef CONFIG_PSI { .name = "io.pressure", + .file_offset = offsetof(struct cgroup, psi_files[PSI_IO]), .seq_show = cgroup_io_pressure_show, .write = cgroup_io_pressure_write, .poll = cgroup_pressure_poll, @@@ -5269,7 -5204,6 +5273,7 @@@ }, { .name = "memory.pressure", + .file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]), .seq_show = cgroup_memory_pressure_show, .write = cgroup_memory_pressure_write, .poll = cgroup_pressure_poll, @@@ -5277,27 -5211,11 +5281,27 @@@ }, { .name = "cpu.pressure", + .file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]), .seq_show = cgroup_cpu_pressure_show, .write = cgroup_cpu_pressure_write, .poll = cgroup_pressure_poll, .release = cgroup_pressure_release, }, +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + { + .name = "irq.pressure", + .file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]), + .seq_show = cgroup_irq_pressure_show, + .write = cgroup_irq_pressure_write, + .poll = cgroup_pressure_poll, + .release = cgroup_pressure_release, + }, +#endif + { + .name = "cgroup.pressure", + .seq_show = cgroup_pressure_show, + .write = cgroup_pressure_write, + }, #endif /* CONFIG_PSI */ { } /* terminate */ }; diff --combined lib/Kconfig.debug index 3638b3424be5,b620a340d7df..13a1046a7d6f --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@@ -231,11 -231,6 +231,11 @@@ config DEBUG_INF in the "Debug information" choice below, indicating that debug information will be generated for build targets. +# Clang is known to generate .{s,u}leb128 with symbol deltas with DWARF5, which +# some targets may not support: https://sourceware.org/bugzilla/show_bug.cgi?id=27215 +config AS_HAS_NON_CONST_LEB128 + def_bool $(as-instr,.uleb128 .Lexpr_end4 - .Lexpr_start3\n.Lexpr_start3:\n.Lexpr_end4:) + choice prompt "Debug information" depends on DEBUG_KERNEL @@@ -258,7 -253,6 +258,7 @@@ config DEBUG_INFO_NON config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT bool "Rely on the toolchain's implicit default DWARF version" select DEBUG_INFO + depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128) help The implicit default version of DWARF debug info produced by a toolchain changes over time. @@@ -270,7 -264,7 +270,7 @@@ config DEBUG_INFO_DWARF4 bool "Generate DWARF Version 4 debuginfo" select DEBUG_INFO - depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) + depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502) help Generate DWARF v4 debug info. This requires gcc 4.5+, binutils 2.35.2 if using clang without clang's integrated assembler, and gdb 7.0+. @@@ -282,7 -276,7 +282,7 @@@ config DEBUG_INFO_DWARF5 bool "Generate DWARF Version 5 debuginfo" select DEBUG_INFO - depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) + depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128) help Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc 5.0+ accepts the -gdwarf-5 flag but only had partial support for some @@@ -395,15 -389,13 +395,15 @@@ endif # DEBUG_INF config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 + default 0 if KMSAN default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 2048 if PARISC default 1536 if (!64BIT && XTENSA) + default 1280 if KASAN && !64BIT default 1024 if !64BIT default 2048 if 64BIT help - Tell gcc to warn at build time for stack frames larger than this. + Tell the compiler to warn at build time for stack frames larger than this. Setting this too low will cause a lot of warnings. Setting it to 0 disables the warning. @@@ -825,12 -817,13 +825,12 @@@ config DEBUG_V If unsure, say N. -config DEBUG_VM_VMACACHE - bool "Debug VMA caching" +config DEBUG_VM_MAPLE_TREE + bool "Debug VM maple trees" depends on DEBUG_VM + select DEBUG_MAPLE_TREE help - Enable this to turn on VMA caching debug information. Doing so - can cause significant overhead, so only enable it in non-production - environments. + Enable VM maple tree debugging information and extra validations. If unsure, say N. @@@ -983,7 -976,6 +983,7 @@@ config DEBUG_STACKOVERFLO source "lib/Kconfig.kasan" source "lib/Kconfig.kfence" +source "lib/Kconfig.kmsan" endmenu # "Memory Debugging" @@@ -1648,14 -1640,6 +1648,14 @@@ config BUG_ON_DATA_CORRUPTIO If unsure, say N. +config DEBUG_MAPLE_TREE + bool "Debug maple trees" + depends on DEBUG_KERNEL + help + Enable maple tree debugging information and extra validations. + + If unsure, say N. + endmenu config DEBUG_CREDENTIALS @@@ -1717,6 -1701,16 +1717,16 @@@ config LATENCYTO Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. + config DEBUG_CGROUP_REF + bool "Disable inlining of cgroup css reference count functions" + depends on DEBUG_KERNEL + depends on CGROUPS + depends on KPROBES + default n + help + Force cgroup css reference count functions to not be inlined so + that they can be kprobed for debugging. + source "kernel/trace/Kconfig" config PROVIDE_OHCI1394_DMA_INIT @@@ -1875,14 -1869,8 +1885,14 @@@ config NETDEV_NOTIFIER_ERROR_INJEC If unsure, say N. config FUNCTION_ERROR_INJECTION - def_bool y + bool "Fault-injections of functions" depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + help + Add fault injections into various functions that are annotated with + ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return + value of theses functions. This is useful to test error paths of code. + + If unsure, say N config FAULT_INJECTION bool "Fault-injection framework" @@@ -2114,7 -2102,6 +2124,7 @@@ config KPROBES_SANITY_TES depends on DEBUG_KERNEL depends on KPROBES depends on KUNIT + select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE default KUNIT_ALL_TESTS help This option provides for testing basic kprobes functionality on @@@ -2249,10 -2236,6 +2259,10 @@@ config TEST_UUI config TEST_XARRAY tristate "Test the XArray code at runtime" +config TEST_MAPLE_TREE + select DEBUG_MAPLE_TREE + tristate "Test the Maple Tree code at runtime" + config TEST_RHASHTABLE tristate "Perform selftest on resizable hash table" help