]> Git Repo - J-linux.git/commitdiff
Merge patch series "riscv: enable lockless lockref implementation"
authorPalmer Dabbelt <[email protected]>
Wed, 24 Apr 2024 19:57:51 +0000 (12:57 -0700)
committerPalmer Dabbelt <[email protected]>
Tue, 30 Apr 2024 17:35:46 +0000 (10:35 -0700)
Jisheng Zhang <[email protected]> says:

This series selects ARCH_USE_CMPXCHG_LOCKREF to enable the
cmpxchg-based lockless lockref implementation for riscv. Then,
implement arch_cmpxchg64_{relaxed|acquire|release}.

After patch1:
Using Linus' test case[1] on TH1520 platform, I see a 11.2% improvement.
On JH7110 platform, I see 12.0% improvement.

After patch2:
on both TH1520 and JH7110 platforms, I didn't see obvious
performance improvement with Linus' test case [1]. IMHO, this may
be related with the fence and lr.d/sc.d hw implementations. In theory,
lr/sc without fence could give performance improvement over lr/sc plus
fence, so add the code here to leave performance improvement room on
newer HW platforms.

* b4-shazam-merge:
  riscv: cmpxchg: implement arch_cmpxchg64_{relaxed|acquire|release}
  riscv: select ARCH_USE_CMPXCHG_LOCKREF

Link: http://marc.info/?l=linux-fsdevel&m=137782380714721&w=4
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Palmer Dabbelt <[email protected]>
1  2 
arch/riscv/Kconfig
arch/riscv/include/asm/cmpxchg.h

diff --combined arch/riscv/Kconfig
index d77d416dcd8f47046f104f36f89399fe7a174066,b8b96baad0587a48de7906066e0d551c9f3cc14d..2dac484ea64559fc1a79f3b679accf0cb04bafb6
@@@ -27,21 -27,17 +27,21 @@@ config RISC
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_GIGANTIC_PAGE
        select ARCH_HAS_KCOV
 +      select ARCH_HAS_MEMBARRIER_CALLBACKS
 +      select ARCH_HAS_MEMBARRIER_SYNC_CORE
        select ARCH_HAS_MMIOWB
        select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
        select ARCH_HAS_PMEM_API
 +      select ARCH_HAS_PREPARE_SYNC_CORE_CMD
        select ARCH_HAS_PTE_SPECIAL
        select ARCH_HAS_SET_DIRECT_MAP if MMU
        select ARCH_HAS_SET_MEMORY if MMU
        select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
        select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
 +      select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
        select ARCH_HAS_SYSCALL_WRAPPER
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 -      select ARCH_HAS_UBSAN_SANITIZE_ALL
 +      select ARCH_HAS_UBSAN
        select ARCH_HAS_VDSO_DATA
        select ARCH_KEEP_MEMBLOCK if ACPI
        select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
        select ARCH_SUPPORTS_CFI_CLANG
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
        select ARCH_SUPPORTS_HUGETLBFS if MMU
 +      # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505
 +      select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000
 +      select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000
        select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
        select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
        select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
+       select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
        select ARCH_USE_MEMTEST
        select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_USES_CFI_TRAPS if CFI_CLANG
@@@ -71,7 -65,7 +72,7 @@@
        select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
        select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
        select BUILDTIME_TABLE_SORT if MMU
 -      select CLINT_TIMER if !MMU
 +      select CLINT_TIMER if RISCV_M_MODE
        select CLONE_BACKWARDS
        select COMMON_CLK
        select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
        select HAVE_ARCH_KGDB_QXFER_PKT
        select HAVE_ARCH_MMAP_RND_BITS if MMU
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
 +      select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_THREAD_STRUCT_WHITELIST
        select HAVE_ARCH_TRACEHOOK
        select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
        select HAVE_EBPF_JIT if MMU
 +      select HAVE_FAST_GUP if MMU
        select HAVE_FUNCTION_ARG_ACCESS_API
        select HAVE_FUNCTION_ERROR_INJECTION
        select HAVE_GCC_PLUGINS
        select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
        select HAVE_MOVE_PMD
        select HAVE_MOVE_PUD
 +      select HAVE_PAGE_SIZE_4KB
        select HAVE_PCI
        select HAVE_PERF_EVENTS
        select HAVE_PERF_REGS
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RETHOOK if !XIP_KERNEL
        select HAVE_RSEQ
 +      select HAVE_RUST if 64BIT
        select HAVE_SAMPLE_FTRACE_DIRECT
        select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
        select HAVE_STACKPROTECTOR
        select IRQ_FORCED_THREADING
        select KASAN_VMALLOC if KASAN
        select LOCK_MM_AND_FIND_VMA
 +      select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU
        select MODULES_USE_ELF_RELA if MODULES
        select MODULE_SECTIONS if MODULES
        select OF
  
  config CLANG_SUPPORTS_DYNAMIC_FTRACE
        def_bool CC_IS_CLANG
 -      # https://github.com/llvm/llvm-project/commit/6ab8927931851bb42b2c93a00801dc499d7d9b1e
 -      depends on CLANG_VERSION >= 130000
        # https://github.com/ClangBuiltLinux/linux/issues/1817
        depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600))
  
@@@ -230,12 -221,8 +231,12 @@@ config ARCH_MMAP_RND_COMPAT_BITS_MA
  
  # set if we run in machine mode, cleared if we run in supervisor mode
  config RISCV_M_MODE
 -      bool
 -      default !MMU
 +      bool "Build a kernel that runs in machine mode"
 +      depends on !MMU
 +      default y
 +      help
 +        Select this option if you want to run the kernel in M-mode,
 +        without the assistance of any other firmware.
  
  # set if we are running in S-mode and can use SBI calls
  config RISCV_SBI
@@@ -252,9 -239,8 +253,9 @@@ config MM
  
  config PAGE_OFFSET
        hex
 -      default 0xC0000000 if 32BIT && MMU
 -      default 0x80000000 if !MMU
 +      default 0x80000000 if !MMU && RISCV_M_MODE
 +      default 0x80200000 if !MMU
 +      default 0xc0000000 if 32BIT
        default 0xff60000000000000 if 64BIT
  
  config KASAN_SHADOW_OFFSET
@@@ -327,9 -313,10 +328,9 @@@ config AS_HAS_INS
        def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero)
  
  config AS_HAS_OPTION_ARCH
 -      # https://reviews.llvm.org/D123515
 +      # https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4
        def_bool y
        depends on $(as-instr, .option arch$(comma) +m)
 -      depends on !$(as-instr, .option arch$(comma) -i)
  
  source "arch/riscv/Kconfig.socs"
  source "arch/riscv/Kconfig.errata"
@@@ -592,16 -579,10 +593,16 @@@ config TOOLCHAIN_HAS_ZB
        depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
        depends on AS_HAS_OPTION_ARCH
  
 +# This symbol indicates that the toolchain supports all v1.0 vector crypto
 +# extensions, including Zvk*, Zvbb, and Zvbc.  LLVM added all of these at once.
 +# binutils added all except Zvkb, then added Zvkb.  So we just check for Zvkb.
 +config TOOLCHAIN_HAS_VECTOR_CRYPTO
 +      def_bool $(as-instr, .option arch$(comma) +v$(comma) +zvkb)
 +      depends on AS_HAS_OPTION_ARCH
 +
  config RISCV_ISA_ZBB
        bool "Zbb extension support for bit manipulation instructions"
        depends on TOOLCHAIN_HAS_ZBB
 -      depends on MMU
        depends on RISCV_ALTERNATIVE
        default y
        help
@@@ -633,6 -614,7 +634,6 @@@ config RISCV_ISA_ZICBO
  
  config RISCV_ISA_ZICBOZ
        bool "Zicboz extension support for faster zeroing of memory"
 -      depends on MMU
        depends on RISCV_ALTERNATIVE
        default y
        help
@@@ -707,61 -689,27 +708,61 @@@ config THREAD_SIZE_ORDE
          affects irq stack size, which is equal to thread stack size.
  
  config RISCV_MISALIGNED
 -      bool "Support misaligned load/store traps for kernel and userspace"
 +      bool
        select SYSCTL_ARCH_UNALIGN_ALLOW
 -      default y
        help
 -        Say Y here if you want the kernel to embed support for misaligned
 -        load/store for both kernel and userspace. When disable, misaligned
 -        accesses will generate SIGBUS in userspace and panic in kernel.
 +        Embed support for emulating misaligned loads and stores.
 +
 +choice
 +      prompt "Unaligned Accesses Support"
 +      default RISCV_PROBE_UNALIGNED_ACCESS
 +      help
 +        This determines the level of support for unaligned accesses. This
 +        information is used by the kernel to perform optimizations. It is also
 +        exposed to user space via the hwprobe syscall. The hardware will be
 +        probed at boot by default.
 +
 +config RISCV_PROBE_UNALIGNED_ACCESS
 +      bool "Probe for hardware unaligned access support"
 +      select RISCV_MISALIGNED
 +      help
 +        During boot, the kernel will run a series of tests to determine the
 +        speed of unaligned accesses. This probing will dynamically determine
 +        the speed of unaligned accesses on the underlying system. If unaligned
 +        memory accesses trap into the kernel as they are not supported by the
 +        system, the kernel will emulate the unaligned accesses to preserve the
 +        UABI.
 +
 +config RISCV_EMULATED_UNALIGNED_ACCESS
 +      bool "Emulate unaligned access where system support is missing"
 +      select RISCV_MISALIGNED
 +      help
 +        If unaligned memory accesses trap into the kernel as they are not
 +        supported by the system, the kernel will emulate the unaligned
 +        accesses to preserve the UABI. When the underlying system does support
 +        unaligned accesses, the unaligned accesses are assumed to be slow.
 +
 +config RISCV_SLOW_UNALIGNED_ACCESS
 +      bool "Assume the system supports slow unaligned memory accesses"
 +      depends on NONPORTABLE
 +      help
 +        Assume that the system supports slow unaligned memory accesses. The
 +        kernel and userspace programs may not be able to run at all on systems
 +        that do not support unaligned memory accesses.
  
  config RISCV_EFFICIENT_UNALIGNED_ACCESS
 -      bool "Assume the CPU supports fast unaligned memory accesses"
 +      bool "Assume the system supports fast unaligned memory accesses"
        depends on NONPORTABLE
        select DCACHE_WORD_ACCESS if MMU
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
        help
 -        Say Y here if you want the kernel to assume that the CPU supports
 -        efficient unaligned memory accesses.  When enabled, this option
 -        improves the performance of the kernel on such CPUs.  However, the
 -        kernel will run much more slowly, or will not be able to run at all,
 -        on CPUs that do not support efficient unaligned memory accesses.
 +        Assume that the system supports fast unaligned memory accesses. When
 +        enabled, this option improves the performance of the kernel on such
 +        systems. However, the kernel and userspace programs will run much more
 +        slowly, or will not be able to run at all, on systems that do not
 +        support efficient unaligned memory accesses.
  
 -        If unsure what to do here, say N.
 +endchoice
  
  endmenu # "Platform type"
  
@@@ -820,7 -768,7 +821,7 @@@ config ARCH_SUPPORTS_CRASH_DUM
        def_bool y
  
  config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
 -      def_bool CRASH_CORE
 +      def_bool CRASH_RESERVE
  
  config COMPAT
        bool "Kernel support for 32-bit U-mode"
@@@ -1036,19 -984,7 +1037,19 @@@ config RISCV_ISA_FALLBAC
  config BUILTIN_DTB
        bool "Built-in device tree"
        depends on OF && NONPORTABLE
 -      default y if XIP_KERNEL
 +      help
 +        Build a device tree into the Linux image.
 +        This option should be selected if no bootloader is being used.
 +        If unsure, say N.
 +
 +
 +config BUILTIN_DTB_SOURCE
 +      string "Built-in device tree source"
 +      depends on BUILTIN_DTB
 +      help
 +        DTS file path (without suffix, relative to arch/riscv/boot/dts)
 +        for the DTS file that will be used to produce the DTB linked into the
 +        kernel.
  
  endmenu # "Boot options"
  
@@@ -1066,8 -1002,11 +1067,8 @@@ menu "Power management options
  
  source "kernel/power/Kconfig"
  
 -# Hibernation is only possible on systems where the SBI implementation has
 -# marked its reserved memory as not accessible from, or does not run
 -# from the same memory as, Linux
  config ARCH_HIBERNATION_POSSIBLE
 -      def_bool NONPORTABLE
 +      def_bool y
  
  config ARCH_HIBERNATION_HEADER
        def_bool HIBERNATION
index 4d23f0c35b94970da5802a1ac807c0db7186f4e1,6318187f426f65a75624e5e52f2f3ed4030f8fc0..ddb002ed89dea0dbf4956a5161a0f426bcba557f
  
  #include <linux/bug.h>
  
 -#include <asm/barrier.h>
  #include <asm/fence.h>
  
 -#define __xchg_relaxed(ptr, new, size)                                        \
 +#define __arch_xchg_masked(prepend, append, r, p, n)                  \
 +({                                                                    \
 +      u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);                     \
 +      ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;  \
 +      ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)   \
 +                      << __s;                                         \
 +      ulong __newx = (ulong)(n) << __s;                               \
 +      ulong __retx;                                                   \
 +      ulong __rc;                                                     \
 +                                                                      \
 +      __asm__ __volatile__ (                                          \
 +             prepend                                                  \
 +             "0:      lr.w %0, %2\n"                                  \
 +             "        and  %1, %0, %z4\n"                             \
 +             "        or   %1, %1, %z3\n"                             \
 +             "        sc.w %1, %1, %2\n"                              \
 +             "        bnez %1, 0b\n"                                  \
 +             append                                                   \
 +             : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))       \
 +             : "rJ" (__newx), "rJ" (~__mask)                          \
 +             : "memory");                                             \
 +                                                                      \
 +      r = (__typeof__(*(p)))((__retx & __mask) >> __s);               \
 +})
 +
 +#define __arch_xchg(sfx, prepend, append, r, p, n)                    \
 +({                                                                    \
 +      __asm__ __volatile__ (                                          \
 +              prepend                                                 \
 +              "       amoswap" sfx " %0, %2, %1\n"                    \
 +              append                                                  \
 +              : "=r" (r), "+A" (*(p))                                 \
 +              : "r" (n)                                               \
 +              : "memory");                                            \
 +})
 +
 +#define _arch_xchg(ptr, new, sfx, prepend, append)                    \
  ({                                                                    \
        __typeof__(ptr) __ptr = (ptr);                                  \
 -      __typeof__(new) __new = (new);                                  \
 -      __typeof__(*(ptr)) __ret;                                       \
 -      switch (size) {                                                 \
 +      __typeof__(*(__ptr)) __new = (new);                             \
 +      __typeof__(*(__ptr)) __ret;                                     \
 +                                                                      \
 +      switch (sizeof(*__ptr)) {                                       \
 +      case 1:                                                         \
 +      case 2:                                                         \
 +              __arch_xchg_masked(prepend, append,                     \
 +                                 __ret, __ptr, __new);                \
 +              break;                                                  \
        case 4:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "       amoswap.w %0, %2, %1\n"                 \
 -                      : "=r" (__ret), "+A" (*__ptr)                   \
 -                      : "r" (__new)                                   \
 -                      : "memory");                                    \
 +              __arch_xchg(".w" sfx, prepend, append,                  \
 +                            __ret, __ptr, __new);                     \
                break;                                                  \
        case 8:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "       amoswap.d %0, %2, %1\n"                 \
 -                      : "=r" (__ret), "+A" (*__ptr)                   \
 -                      : "r" (__new)                                   \
 -                      : "memory");                                    \
 +              __arch_xchg(".d" sfx, prepend, append,                  \
 +                            __ret, __ptr, __new);                     \
                break;                                                  \
        default:                                                        \
                BUILD_BUG();                                            \
        }                                                               \
 -      __ret;                                                          \
 +      (__typeof__(*(__ptr)))__ret;                                    \
  })
  
  #define arch_xchg_relaxed(ptr, x)                                     \
 -({                                                                    \
 -      __typeof__(*(ptr)) _x_ = (x);                                   \
 -      (__typeof__(*(ptr))) __xchg_relaxed((ptr),                      \
 -                                          _x_, sizeof(*(ptr)));       \
 -})
 -
 -#define __xchg_acquire(ptr, new, size)                                        \
 -({                                                                    \
 -      __typeof__(ptr) __ptr = (ptr);                                  \
 -      __typeof__(new) __new = (new);                                  \
 -      __typeof__(*(ptr)) __ret;                                       \
 -      switch (size) {                                                 \
 -      case 4:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "       amoswap.w %0, %2, %1\n"                 \
 -                      RISCV_ACQUIRE_BARRIER                           \
 -                      : "=r" (__ret), "+A" (*__ptr)                   \
 -                      : "r" (__new)                                   \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      case 8:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "       amoswap.d %0, %2, %1\n"                 \
 -                      RISCV_ACQUIRE_BARRIER                           \
 -                      : "=r" (__ret), "+A" (*__ptr)                   \
 -                      : "r" (__new)                                   \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      default:                                                        \
 -              BUILD_BUG();                                            \
 -      }                                                               \
 -      __ret;                                                          \
 -})
 +      _arch_xchg(ptr, x, "", "", "")
  
  #define arch_xchg_acquire(ptr, x)                                     \
 -({                                                                    \
 -      __typeof__(*(ptr)) _x_ = (x);                                   \
 -      (__typeof__(*(ptr))) __xchg_acquire((ptr),                      \
 -                                          _x_, sizeof(*(ptr)));       \
 -})
 -
 -#define __xchg_release(ptr, new, size)                                        \
 -({                                                                    \
 -      __typeof__(ptr) __ptr = (ptr);                                  \
 -      __typeof__(new) __new = (new);                                  \
 -      __typeof__(*(ptr)) __ret;                                       \
 -      switch (size) {                                                 \
 -      case 4:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      RISCV_RELEASE_BARRIER                           \
 -                      "       amoswap.w %0, %2, %1\n"                 \
 -                      : "=r" (__ret), "+A" (*__ptr)                   \
 -                      : "r" (__new)                                   \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      case 8:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      RISCV_RELEASE_BARRIER                           \
 -                      "       amoswap.d %0, %2, %1\n"                 \
 -                      : "=r" (__ret), "+A" (*__ptr)                   \
 -                      : "r" (__new)                                   \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      default:                                                        \
 -              BUILD_BUG();                                            \
 -      }                                                               \
 -      __ret;                                                          \
 -})
 +      _arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
  
  #define arch_xchg_release(ptr, x)                                     \
 -({                                                                    \
 -      __typeof__(*(ptr)) _x_ = (x);                                   \
 -      (__typeof__(*(ptr))) __xchg_release((ptr),                      \
 -                                          _x_, sizeof(*(ptr)));       \
 -})
 -
 -#define __arch_xchg(ptr, new, size)                                   \
 -({                                                                    \
 -      __typeof__(ptr) __ptr = (ptr);                                  \
 -      __typeof__(new) __new = (new);                                  \
 -      __typeof__(*(ptr)) __ret;                                       \
 -      switch (size) {                                                 \
 -      case 4:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "       amoswap.w.aqrl %0, %2, %1\n"            \
 -                      : "=r" (__ret), "+A" (*__ptr)                   \
 -                      : "r" (__new)                                   \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      case 8:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "       amoswap.d.aqrl %0, %2, %1\n"            \
 -                      : "=r" (__ret), "+A" (*__ptr)                   \
 -                      : "r" (__new)                                   \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      default:                                                        \
 -              BUILD_BUG();                                            \
 -      }                                                               \
 -      __ret;                                                          \
 -})
 +      _arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
  
  #define arch_xchg(ptr, x)                                             \
 -({                                                                    \
 -      __typeof__(*(ptr)) _x_ = (x);                                   \
 -      (__typeof__(*(ptr))) __arch_xchg((ptr), _x_, sizeof(*(ptr)));   \
 -})
 +      _arch_xchg(ptr, x, ".aqrl", "", "")
  
  #define xchg32(ptr, x)                                                        \
  ({                                                                    \
   * store NEW in MEM.  Return the initial value in MEM.  Success is
   * indicated by comparing RETURN with OLD.
   */
 -#define __cmpxchg_relaxed(ptr, old, new, size)                                \
 -({                                                                    \
 -      __typeof__(ptr) __ptr = (ptr);                                  \
 -      __typeof__(*(ptr)) __old = (old);                               \
 -      __typeof__(*(ptr)) __new = (new);                               \
 -      __typeof__(*(ptr)) __ret;                                       \
 -      register unsigned int __rc;                                     \
 -      switch (size) {                                                 \
 -      case 4:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "0:     lr.w %0, %2\n"                          \
 -                      "       bne  %0, %z3, 1f\n"                     \
 -                      "       sc.w %1, %z4, %2\n"                     \
 -                      "       bnez %1, 0b\n"                          \
 -                      "1:\n"                                          \
 -                      : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
 -                      : "rJ" ((long)__old), "rJ" (__new)              \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      case 8:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "0:     lr.d %0, %2\n"                          \
 -                      "       bne %0, %z3, 1f\n"                      \
 -                      "       sc.d %1, %z4, %2\n"                     \
 -                      "       bnez %1, 0b\n"                          \
 -                      "1:\n"                                          \
 -                      : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
 -                      : "rJ" (__old), "rJ" (__new)                    \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      default:                                                        \
 -              BUILD_BUG();                                            \
 -      }                                                               \
 -      __ret;                                                          \
 -})
  
 -#define arch_cmpxchg_relaxed(ptr, o, n)                                       \
 +#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n)    \
 +({                                                                    \
 +      u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);                     \
 +      ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;  \
 +      ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)   \
 +                      << __s;                                         \
 +      ulong __newx = (ulong)(n) << __s;                               \
 +      ulong __oldx = (ulong)(o) << __s;                               \
 +      ulong __retx;                                                   \
 +      ulong __rc;                                                     \
 +                                                                      \
 +      __asm__ __volatile__ (                                          \
 +              prepend                                                 \
 +              "0:     lr.w %0, %2\n"                                  \
 +              "       and  %1, %0, %z5\n"                             \
 +              "       bne  %1, %z3, 1f\n"                             \
 +              "       and  %1, %0, %z6\n"                             \
 +              "       or   %1, %1, %z4\n"                             \
 +              "       sc.w" sc_sfx " %1, %1, %2\n"                    \
 +              "       bnez %1, 0b\n"                                  \
 +              append                                                  \
 +              "1:\n"                                                  \
 +              : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))      \
 +              : "rJ" ((long)__oldx), "rJ" (__newx),                   \
 +                "rJ" (__mask), "rJ" (~__mask)                         \
 +              : "memory");                                            \
 +                                                                      \
 +      r = (__typeof__(*(p)))((__retx & __mask) >> __s);               \
 +})
 +
 +#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n)       \
  ({                                                                    \
 -      __typeof__(*(ptr)) _o_ = (o);                                   \
 -      __typeof__(*(ptr)) _n_ = (n);                                   \
 -      (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr),                   \
 -                                      _o_, _n_, sizeof(*(ptr)));      \
 -})
 -
 -#define __cmpxchg_acquire(ptr, old, new, size)                                \
 +      register unsigned int __rc;                                     \
 +                                                                      \
 +      __asm__ __volatile__ (                                          \
 +              prepend                                                 \
 +              "0:     lr" lr_sfx " %0, %2\n"                          \
 +              "       bne  %0, %z3, 1f\n"                             \
 +              "       sc" sc_sfx " %1, %z4, %2\n"                     \
 +              "       bnez %1, 0b\n"                                  \
 +              append                                                  \
 +              "1:\n"                                                  \
 +              : "=&r" (r), "=&r" (__rc), "+A" (*(p))                  \
 +              : "rJ" (co o), "rJ" (n)                                 \
 +              : "memory");                                            \
 +})
 +
 +#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append)         \
  ({                                                                    \
        __typeof__(ptr) __ptr = (ptr);                                  \
 -      __typeof__(*(ptr)) __old = (old);                               \
 -      __typeof__(*(ptr)) __new = (new);                               \
 -      __typeof__(*(ptr)) __ret;                                       \
 -      register unsigned int __rc;                                     \
 -      switch (size) {                                                 \
 +      __typeof__(*(__ptr)) __old = (old);                             \
 +      __typeof__(*(__ptr)) __new = (new);                             \
 +      __typeof__(*(__ptr)) __ret;                                     \
 +                                                                      \
 +      switch (sizeof(*__ptr)) {                                       \
 +      case 1:                                                         \
 +      case 2:                                                         \
 +              __arch_cmpxchg_masked(sc_sfx, prepend, append,          \
 +                                      __ret, __ptr, __old, __new);    \
 +              break;                                                  \
        case 4:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "0:     lr.w %0, %2\n"                          \
 -                      "       bne  %0, %z3, 1f\n"                     \
 -                      "       sc.w %1, %z4, %2\n"                     \
 -                      "       bnez %1, 0b\n"                          \
 -                      RISCV_ACQUIRE_BARRIER                           \
 -                      "1:\n"                                          \
 -                      : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
 -                      : "rJ" ((long)__old), "rJ" (__new)              \
 -                      : "memory");                                    \
 +              __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
 +                              __ret, __ptr, (long), __old, __new);    \
                break;                                                  \
        case 8:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "0:     lr.d %0, %2\n"                          \
 -                      "       bne %0, %z3, 1f\n"                      \
 -                      "       sc.d %1, %z4, %2\n"                     \
 -                      "       bnez %1, 0b\n"                          \
 -                      RISCV_ACQUIRE_BARRIER                           \
 -                      "1:\n"                                          \
 -                      : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
 -                      : "rJ" (__old), "rJ" (__new)                    \
 -                      : "memory");                                    \
 +              __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append,      \
 +                              __ret, __ptr, /**/, __old, __new);      \
                break;                                                  \
        default:                                                        \
                BUILD_BUG();                                            \
        }                                                               \
 -      __ret;                                                          \
 +      (__typeof__(*(__ptr)))__ret;                                    \
  })
  
 -#define arch_cmpxchg_acquire(ptr, o, n)                                       \
 -({                                                                    \
 -      __typeof__(*(ptr)) _o_ = (o);                                   \
 -      __typeof__(*(ptr)) _n_ = (n);                                   \
 -      (__typeof__(*(ptr))) __cmpxchg_acquire((ptr),                   \
 -                                      _o_, _n_, sizeof(*(ptr)));      \
 -})
 +#define arch_cmpxchg_relaxed(ptr, o, n)                                       \
 +      _arch_cmpxchg((ptr), (o), (n), "", "", "")
  
 -#define __cmpxchg_release(ptr, old, new, size)                                \
 -({                                                                    \
 -      __typeof__(ptr) __ptr = (ptr);                                  \
 -      __typeof__(*(ptr)) __old = (old);                               \
 -      __typeof__(*(ptr)) __new = (new);                               \
 -      __typeof__(*(ptr)) __ret;                                       \
 -      register unsigned int __rc;                                     \
 -      switch (size) {                                                 \
 -      case 4:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      RISCV_RELEASE_BARRIER                           \
 -                      "0:     lr.w %0, %2\n"                          \
 -                      "       bne  %0, %z3, 1f\n"                     \
 -                      "       sc.w %1, %z4, %2\n"                     \
 -                      "       bnez %1, 0b\n"                          \
 -                      "1:\n"                                          \
 -                      : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
 -                      : "rJ" ((long)__old), "rJ" (__new)              \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      case 8:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      RISCV_RELEASE_BARRIER                           \
 -                      "0:     lr.d %0, %2\n"                          \
 -                      "       bne %0, %z3, 1f\n"                      \
 -                      "       sc.d %1, %z4, %2\n"                     \
 -                      "       bnez %1, 0b\n"                          \
 -                      "1:\n"                                          \
 -                      : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
 -                      : "rJ" (__old), "rJ" (__new)                    \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      default:                                                        \
 -              BUILD_BUG();                                            \
 -      }                                                               \
 -      __ret;                                                          \
 -})
 +#define arch_cmpxchg_acquire(ptr, o, n)                                       \
 +      _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
  
  #define arch_cmpxchg_release(ptr, o, n)                                       \
 -({                                                                    \
 -      __typeof__(*(ptr)) _o_ = (o);                                   \
 -      __typeof__(*(ptr)) _n_ = (n);                                   \
 -      (__typeof__(*(ptr))) __cmpxchg_release((ptr),                   \
 -                                      _o_, _n_, sizeof(*(ptr)));      \
 -})
 -
 -#define __cmpxchg(ptr, old, new, size)                                        \
 -({                                                                    \
 -      __typeof__(ptr) __ptr = (ptr);                                  \
 -      __typeof__(*(ptr)) __old = (old);                               \
 -      __typeof__(*(ptr)) __new = (new);                               \
 -      __typeof__(*(ptr)) __ret;                                       \
 -      register unsigned int __rc;                                     \
 -      switch (size) {                                                 \
 -      case 4:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "0:     lr.w %0, %2\n"                          \
 -                      "       bne  %0, %z3, 1f\n"                     \
 -                      "       sc.w.rl %1, %z4, %2\n"                  \
 -                      "       bnez %1, 0b\n"                          \
 -                      "       fence rw, rw\n"                         \
 -                      "1:\n"                                          \
 -                      : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
 -                      : "rJ" ((long)__old), "rJ" (__new)              \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      case 8:                                                         \
 -              __asm__ __volatile__ (                                  \
 -                      "0:     lr.d %0, %2\n"                          \
 -                      "       bne %0, %z3, 1f\n"                      \
 -                      "       sc.d.rl %1, %z4, %2\n"                  \
 -                      "       bnez %1, 0b\n"                          \
 -                      "       fence rw, rw\n"                         \
 -                      "1:\n"                                          \
 -                      : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
 -                      : "rJ" (__old), "rJ" (__new)                    \
 -                      : "memory");                                    \
 -              break;                                                  \
 -      default:                                                        \
 -              BUILD_BUG();                                            \
 -      }                                                               \
 -      __ret;                                                          \
 -})
 +      _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
  
  #define arch_cmpxchg(ptr, o, n)                                               \
 -({                                                                    \
 -      __typeof__(*(ptr)) _o_ = (o);                                   \
 -      __typeof__(*(ptr)) _n_ = (n);                                   \
 -      (__typeof__(*(ptr))) __cmpxchg((ptr),                           \
 -                                     _o_, _n_, sizeof(*(ptr)));       \
 -})
 +      _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
  
  #define arch_cmpxchg_local(ptr, o, n)                                 \
 -      (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
 +      arch_cmpxchg_relaxed((ptr), (o), (n))
  
  #define arch_cmpxchg64(ptr, o, n)                                     \
  ({                                                                    \
        arch_cmpxchg_relaxed((ptr), (o), (n));                          \
  })
  
+ #define arch_cmpxchg64_relaxed(ptr, o, n)                             \
+ ({                                                                    \
+       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
+       arch_cmpxchg_relaxed((ptr), (o), (n));                          \
+ })
+ #define arch_cmpxchg64_acquire(ptr, o, n)                             \
+ ({                                                                    \
+       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
+       arch_cmpxchg_acquire((ptr), (o), (n));                          \
+ })
+ #define arch_cmpxchg64_release(ptr, o, n)                             \
+ ({                                                                    \
+       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
+       arch_cmpxchg_release((ptr), (o), (n));                          \
+ })
  #endif /* _ASM_RISCV_CMPXCHG_H */
This page took 0.088048 seconds and 4 git commands to generate.