]> Git Repo - linux.git/commitdiff
Merge patch series "riscv: Use Kconfig to set unaligned access speed"
authorPalmer Dabbelt <[email protected]>
Wed, 13 Mar 2024 14:30:33 +0000 (07:30 -0700)
committerPalmer Dabbelt <[email protected]>
Fri, 15 Mar 2024 17:17:14 +0000 (10:17 -0700)
Charlie Jenkins <[email protected]> says:

If the hardware unaligned access speed is known at compile time, it is
possible to avoid running the unaligned access speed probe to speedup
boot-time.

* b4-shazam-merge:
  riscv: Set unaligned access speed at compile time
  riscv: Decouple emulated unaligned accesses from access speed
  riscv: Only check online cpus for emulated accesses
  riscv: lib: Introduce has_fast_unaligned_access()

Link: https://lore.kernel.org/r/20240308-disable_misaligned_probe_config-v9-0-a388770ba0ce@rivosinc.com
Signed-off-by: Palmer Dabbelt <[email protected]>
1  2 
arch/riscv/Kconfig
arch/riscv/kernel/cpufeature.c

diff --combined arch/riscv/Kconfig
index 0bfcfec67ed57291f63b97826829ce1225475382,51481bf9364e79e1a38f3031b6a7c0613b6a6d91..8ebafe337eac9880970ff877065c3f7161375313
@@@ -27,18 -27,14 +27,18 @@@ config RISC
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_GIGANTIC_PAGE
        select ARCH_HAS_KCOV
 +      select ARCH_HAS_MEMBARRIER_CALLBACKS
 +      select ARCH_HAS_MEMBARRIER_SYNC_CORE
        select ARCH_HAS_MMIOWB
        select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
        select ARCH_HAS_PMEM_API
 +      select ARCH_HAS_PREPARE_SYNC_CORE_CMD
        select ARCH_HAS_PTE_SPECIAL
        select ARCH_HAS_SET_DIRECT_MAP if MMU
        select ARCH_HAS_SET_MEMORY if MMU
        select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
        select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
 +      select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
        select ARCH_HAS_SYSCALL_WRAPPER
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAS_UBSAN_SANITIZE_ALL
@@@ -51,9 -47,6 +51,9 @@@
        select ARCH_SUPPORTS_CFI_CLANG
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
        select ARCH_SUPPORTS_HUGETLBFS if MMU
 +      # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505
 +      select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000
 +      select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000
        select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
        select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
        select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
        select HAVE_ARCH_KGDB_QXFER_PKT
        select HAVE_ARCH_MMAP_RND_BITS if MMU
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
 +      select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_THREAD_STRUCT_WHITELIST
        select HAVE_ARCH_TRACEHOOK
        select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
        select HAVE_EBPF_JIT if MMU
 +      select HAVE_FAST_GUP if MMU
        select HAVE_FUNCTION_ARG_ACCESS_API
        select HAVE_FUNCTION_ERROR_INJECTION
        select HAVE_GCC_PLUGINS
        select IRQ_FORCED_THREADING
        select KASAN_VMALLOC if KASAN
        select LOCK_MM_AND_FIND_VMA
 +      select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU
        select MODULES_USE_ELF_RELA if MODULES
        select MODULE_SECTIONS if MODULES
        select OF
@@@ -325,6 -315,7 +325,6 @@@ config AS_HAS_OPTION_ARC
        # https://reviews.llvm.org/D123515
        def_bool y
        depends on $(as-instr, .option arch$(comma) +m)
 -      depends on !$(as-instr, .option arch$(comma) -i)
  
  source "arch/riscv/Kconfig.socs"
  source "arch/riscv/Kconfig.errata"
@@@ -587,13 -578,6 +587,13 @@@ config TOOLCHAIN_HAS_ZB
        depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
        depends on AS_HAS_OPTION_ARCH
  
 +# This symbol indicates that the toolchain supports all v1.0 vector crypto
 +# extensions, including Zvk*, Zvbb, and Zvbc.  LLVM added all of these at once.
 +# binutils added all except Zvkb, then added Zvkb.  So we just check for Zvkb.
 +config TOOLCHAIN_HAS_VECTOR_CRYPTO
 +      def_bool $(as-instr, .option arch$(comma) +v$(comma) +zvkb)
 +      depends on AS_HAS_OPTION_ARCH
 +
  config RISCV_ISA_ZBB
        bool "Zbb extension support for bit manipulation instructions"
        depends on TOOLCHAIN_HAS_ZBB
@@@ -704,27 -688,61 +704,61 @@@ config THREAD_SIZE_ORDE
          affects irq stack size, which is equal to thread stack size.
  
  config RISCV_MISALIGNED
-       bool "Support misaligned load/store traps for kernel and userspace"
+       bool
        select SYSCTL_ARCH_UNALIGN_ALLOW
-       default y
        help
-         Say Y here if you want the kernel to embed support for misaligned
-         load/store for both kernel and userspace. When disable, misaligned
-         accesses will generate SIGBUS in userspace and panic in kernel.
+         Embed support for emulating misaligned loads and stores.
+ choice
+       prompt "Unaligned Accesses Support"
+       default RISCV_PROBE_UNALIGNED_ACCESS
+       help
+         This determines the level of support for unaligned accesses. This
+         information is used by the kernel to perform optimizations. It is also
+         exposed to user space via the hwprobe syscall. The hardware will be
+         probed at boot by default.
+ config RISCV_PROBE_UNALIGNED_ACCESS
+       bool "Probe for hardware unaligned access support"
+       select RISCV_MISALIGNED
+       help
+         During boot, the kernel will run a series of tests to determine the
+         speed of unaligned accesses. This probing will dynamically determine
+         the speed of unaligned accesses on the underlying system. If unaligned
+         memory accesses trap into the kernel as they are not supported by the
+         system, the kernel will emulate the unaligned accesses to preserve the
+         UABI.
+ config RISCV_EMULATED_UNALIGNED_ACCESS
+       bool "Emulate unaligned access where system support is missing"
+       select RISCV_MISALIGNED
+       help
+         If unaligned memory accesses trap into the kernel as they are not
+         supported by the system, the kernel will emulate the unaligned
+         accesses to preserve the UABI. When the underlying system does support
+         unaligned accesses, the unaligned accesses are assumed to be slow.
+ config RISCV_SLOW_UNALIGNED_ACCESS
+       bool "Assume the system supports slow unaligned memory accesses"
+       depends on NONPORTABLE
+       help
+         Assume that the system supports slow unaligned memory accesses. The
+         kernel and userspace programs may not be able to run at all on systems
+         that do not support unaligned memory accesses.
  
  config RISCV_EFFICIENT_UNALIGNED_ACCESS
-       bool "Assume the CPU supports fast unaligned memory accesses"
+       bool "Assume the system supports fast unaligned memory accesses"
        depends on NONPORTABLE
        select DCACHE_WORD_ACCESS if MMU
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
        help
-         Say Y here if you want the kernel to assume that the CPU supports
-         efficient unaligned memory accesses.  When enabled, this option
-         improves the performance of the kernel on such CPUs.  However, the
-         kernel will run much more slowly, or will not be able to run at all,
-         on CPUs that do not support efficient unaligned memory accesses.
+         Assume that the system supports fast unaligned memory accesses. When
+         enabled, this option improves the performance of the kernel on such
+         systems. However, the kernel and userspace programs will run much more
+         slowly, or will not be able to run at all, on systems that do not
+         support efficient unaligned memory accesses.
  
-         If unsure what to do here, say N.
+ endchoice
  
  endmenu # "Platform type"
  
@@@ -1017,8 -1035,11 +1051,8 @@@ menu "Power management options
  
  source "kernel/power/Kconfig"
  
 -# Hibernation is only possible on systems where the SBI implementation has
 -# marked its reserved memory as not accessible from, or does not run
 -# from the same memory as, Linux
  config ARCH_HIBERNATION_POSSIBLE
 -      def_bool NONPORTABLE
 +      def_bool y
  
  config ARCH_HIBERNATION_HEADER
        def_bool HIBERNATION
index 0c7688fa83766fe4602ebe4ea837b69961353127,319670af57044cd99ba1c083a1c22f11b5092154..afeae3ff43dc1f880594708444121b823ba8d7ce
@@@ -11,7 -11,6 +11,6 @@@
  #include <linux/cpu.h>
  #include <linux/cpuhotplug.h>
  #include <linux/ctype.h>
- #include <linux/jump_label.h>
  #include <linux/log2.h>
  #include <linux/memory.h>
  #include <linux/module.h>
  #include <asm/cacheflush.h>
  #include <asm/cpufeature.h>
  #include <asm/hwcap.h>
- #include <asm/hwprobe.h>
  #include <asm/patch.h>
  #include <asm/processor.h>
  #include <asm/vector.h>
  
- #include "copy-unaligned.h"
  #define NUM_ALPHA_EXTS ('z' - 'a' + 1)
  
- #define MISALIGNED_ACCESS_JIFFIES_LG2 1
- #define MISALIGNED_BUFFER_SIZE 0x4000
- #define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
- #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
  unsigned long elf_hwcap __read_mostly;
  
  /* Host ISA bitmap */
@@@ -43,11 -34,6 +34,6 @@@ static DECLARE_BITMAP(riscv_isa, RISCV_
  /* Per-cpu ISA extensions. */
  struct riscv_isainfo hart_isa[NR_CPUS];
  
- /* Performance information */
- DEFINE_PER_CPU(long, misaligned_access_speed);
- static cpumask_t fast_misaligned_access;
  /**
   * riscv_isa_extension_base() - Get base extension word
   *
@@@ -307,7 -293,6 +293,7 @@@ const struct riscv_isa_ext_data riscv_i
        __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
        __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
        __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
 +      __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU),
  };
  
  const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
@@@ -707,247 -692,6 +693,6 @@@ unsigned long riscv_get_elf_hwcap(void
        return hwcap;
  }
  
- static int check_unaligned_access(void *param)
- {
-       int cpu = smp_processor_id();
-       u64 start_cycles, end_cycles;
-       u64 word_cycles;
-       u64 byte_cycles;
-       int ratio;
-       unsigned long start_jiffies, now;
-       struct page *page = param;
-       void *dst;
-       void *src;
-       long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
-       if (check_unaligned_access_emulated(cpu))
-               return 0;
-       /* Make an unaligned destination buffer. */
-       dst = (void *)((unsigned long)page_address(page) | 0x1);
-       /* Unalign src as well, but differently (off by 1 + 2 = 3). */
-       src = dst + (MISALIGNED_BUFFER_SIZE / 2);
-       src += 2;
-       word_cycles = -1ULL;
-       /* Do a warmup. */
-       __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-       preempt_disable();
-       start_jiffies = jiffies;
-       while ((now = jiffies) == start_jiffies)
-               cpu_relax();
-       /*
-        * For a fixed amount of time, repeatedly try the function, and take
-        * the best time in cycles as the measurement.
-        */
-       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
-               start_cycles = get_cycles64();
-               /* Ensure the CSR read can't reorder WRT to the copy. */
-               mb();
-               __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-               /* Ensure the copy ends before the end time is snapped. */
-               mb();
-               end_cycles = get_cycles64();
-               if ((end_cycles - start_cycles) < word_cycles)
-                       word_cycles = end_cycles - start_cycles;
-       }
-       byte_cycles = -1ULL;
-       __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-       start_jiffies = jiffies;
-       while ((now = jiffies) == start_jiffies)
-               cpu_relax();
-       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
-               start_cycles = get_cycles64();
-               mb();
-               __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-               mb();
-               end_cycles = get_cycles64();
-               if ((end_cycles - start_cycles) < byte_cycles)
-                       byte_cycles = end_cycles - start_cycles;
-       }
-       preempt_enable();
-       /* Don't divide by zero. */
-       if (!word_cycles || !byte_cycles) {
-               pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
-                       cpu);
-               return 0;
-       }
-       if (word_cycles < byte_cycles)
-               speed = RISCV_HWPROBE_MISALIGNED_FAST;
-       ratio = div_u64((byte_cycles * 100), word_cycles);
-       pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
-               cpu,
-               ratio / 100,
-               ratio % 100,
-               (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
-       per_cpu(misaligned_access_speed, cpu) = speed;
-       /*
-        * Set the value of fast_misaligned_access of a CPU. These operations
-        * are atomic to avoid race conditions.
-        */
-       if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
-               cpumask_set_cpu(cpu, &fast_misaligned_access);
-       else
-               cpumask_clear_cpu(cpu, &fast_misaligned_access);
-       return 0;
- }
- static void check_unaligned_access_nonboot_cpu(void *param)
- {
-       unsigned int cpu = smp_processor_id();
-       struct page **pages = param;
-       if (smp_processor_id() != 0)
-               check_unaligned_access(pages[cpu]);
- }
- DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
- static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
- {
-       if (cpumask_weight(mask) == weight)
-               static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
-       else
-               static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
- }
- static void set_unaligned_access_static_branches_except_cpu(int cpu)
- {
-       /*
-        * Same as set_unaligned_access_static_branches, except excludes the
-        * given CPU from the result. When a CPU is hotplugged into an offline
-        * state, this function is called before the CPU is set to offline in
-        * the cpumask, and thus the CPU needs to be explicitly excluded.
-        */
-       cpumask_t fast_except_me;
-       cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
-       cpumask_clear_cpu(cpu, &fast_except_me);
-       modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
- }
- static void set_unaligned_access_static_branches(void)
- {
-       /*
-        * This will be called after check_unaligned_access_all_cpus so the
-        * result of unaligned access speed for all CPUs will be available.
-        *
-        * To avoid the number of online cpus changing between reading
-        * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
-        * held before calling this function.
-        */
-       cpumask_t fast_and_online;
-       cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
-       modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
- }
- static int lock_and_set_unaligned_access_static_branch(void)
- {
-       cpus_read_lock();
-       set_unaligned_access_static_branches();
-       cpus_read_unlock();
-       return 0;
- }
- arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
- static int riscv_online_cpu(unsigned int cpu)
- {
-       static struct page *buf;
-       /* We are already set since the last check */
-       if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
-               goto exit;
-       buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
-       if (!buf) {
-               pr_warn("Allocation failure, not measuring misaligned performance\n");
-               return -ENOMEM;
-       }
-       check_unaligned_access(buf);
-       __free_pages(buf, MISALIGNED_BUFFER_ORDER);
- exit:
-       set_unaligned_access_static_branches();
-       return 0;
- }
- static int riscv_offline_cpu(unsigned int cpu)
- {
-       set_unaligned_access_static_branches_except_cpu(cpu);
-       return 0;
- }
- /* Measure unaligned access on all CPUs present at boot in parallel. */
- static int check_unaligned_access_all_cpus(void)
- {
-       unsigned int cpu;
-       unsigned int cpu_count = num_possible_cpus();
-       struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
-                                    GFP_KERNEL);
-       if (!bufs) {
-               pr_warn("Allocation failure, not measuring misaligned performance\n");
-               return 0;
-       }
-       /*
-        * Allocate separate buffers for each CPU so there's no fighting over
-        * cache lines.
-        */
-       for_each_cpu(cpu, cpu_online_mask) {
-               bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
-               if (!bufs[cpu]) {
-                       pr_warn("Allocation failure, not measuring misaligned performance\n");
-                       goto out;
-               }
-       }
-       /* Check everybody except 0, who stays behind to tend jiffies. */
-       on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
-       /* Check core 0. */
-       smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
-       /*
-        * Setup hotplug callbacks for any new CPUs that come online or go
-        * offline.
-        */
-       cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
-                                 riscv_online_cpu, riscv_offline_cpu);
- out:
-       unaligned_emulation_finish();
-       for_each_cpu(cpu, cpu_online_mask) {
-               if (bufs[cpu])
-                       __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
-       }
-       kfree(bufs);
-       return 0;
- }
- arch_initcall(check_unaligned_access_all_cpus);
  void riscv_user_isa_enable(void)
  {
        if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
This page took 0.076962 seconds and 4 git commands to generate.