]> Git Repo - linux.git/commitdiff
Merge patch series "riscv: mm: Extend mappable memory up to hint address"
authorPalmer Dabbelt <[email protected]>
Fri, 15 Mar 2024 17:17:34 +0000 (10:17 -0700)
committerPalmer Dabbelt <[email protected]>
Fri, 15 Mar 2024 17:17:34 +0000 (10:17 -0700)
Charlie Jenkins <[email protected]> says:

On riscv, mmap currently returns an address from the largest address
space that can fit entirely inside of the hint address. This makes it
such that the hint address is almost never returned. This patch raises
the mappable area up to and including the hint address. This allows mmap
to often return the hint address, which allows a performance improvement
over searching for a valid address as well as making the behavior more
similar to other architectures.

Note that a previous patch introduced stronger semantics compared to
other architectures for riscv mmap. On riscv, mmap will not use bits in
the upper bits of the virtual address depending on the hint address. On
other architectures, a random address is returned in the address space
requested. On all architectures the hint address will be returned if it
is available. This allows riscv applications to configure how many bits
in the virtual address should be left empty. This has the two benefits
of being able to request address spaces that are smaller than the
default and doesn't require the application to know the page table
layout of riscv.

* b4-shazam-merge:
  docs: riscv: Define behavior of mmap
  selftests: riscv: Generalize mm selftests
  riscv: mm: Use hint address in mmap if available

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Palmer Dabbelt <[email protected]>
1  2 
arch/riscv/include/asm/processor.h
tools/testing/selftests/riscv/mm/mmap_test.h

index a8509cc31ab25a5dcc75765bdb99e43e87dded3b,8ece7a8f0e18bf00ca643b5c4bb27db28042850f..0d13b4497b01745a467f580b6a2dd52b274de206
  
  #include <asm/ptrace.h>
  
- #ifdef CONFIG_64BIT
- #define DEFAULT_MAP_WINDOW    (UL(1) << (MMAP_VA_BITS - 1))
- #define STACK_TOP_MAX         TASK_SIZE
  #define arch_get_mmap_end(addr, len, flags)                   \
  ({                                                            \
        unsigned long mmap_end;                                 \
        typeof(addr) _addr = (addr);                            \
-       if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
+       if ((_addr) == 0 ||                                     \
+           (IS_ENABLED(CONFIG_COMPAT) && is_compat_task()) ||  \
+           ((_addr + len) > BIT(VA_BITS - 1)))                 \
                mmap_end = STACK_TOP_MAX;                       \
-       else if ((_addr) >= VA_USER_SV57)                       \
-               mmap_end = STACK_TOP_MAX;                       \
-       else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
-               mmap_end = VA_USER_SV48;                        \
        else                                                    \
-               mmap_end = VA_USER_SV39;                        \
+               mmap_end = (_addr + len);                       \
        mmap_end;                                               \
  })
  
        typeof(addr) _addr = (addr);                            \
        typeof(base) _base = (base);                            \
        unsigned long rnd_gap = DEFAULT_MAP_WINDOW - (_base);   \
-       if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
+       if ((_addr) == 0 ||                                     \
+           (IS_ENABLED(CONFIG_COMPAT) && is_compat_task()) ||  \
+           ((_addr + len) > BIT(VA_BITS - 1)))                 \
                mmap_base = (_base);                            \
-       else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \
-               mmap_base = VA_USER_SV57 - rnd_gap;             \
-       else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
-               mmap_base = VA_USER_SV48 - rnd_gap;             \
        else                                                    \
-               mmap_base = VA_USER_SV39 - rnd_gap;             \
+               mmap_base = (_addr + len) - rnd_gap;            \
        mmap_base;                                              \
  })
  
+ #ifdef CONFIG_64BIT
+ #define DEFAULT_MAP_WINDOW    (UL(1) << (MMAP_VA_BITS - 1))
+ #define STACK_TOP_MAX         TASK_SIZE_64
  #else
  #define DEFAULT_MAP_WINDOW    TASK_SIZE
  #define STACK_TOP_MAX         TASK_SIZE
  struct task_struct;
  struct pt_regs;
  
 +/*
 + * We use a flag to track in-kernel Vector context. Currently the flag has the
 + * following meaning:
 + *
 + *  - bit 0: indicates whether the in-kernel Vector context is active. The
 + *    activation of this state disables the preemption. On a non-RT kernel, it
 + *    also disable bh.
 + *  - bits 8: is used for tracking preemptible kernel-mode Vector, when
 + *    RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not
 + *    disable the preemption if the thread's kernel_vstate.datap is allocated.
 + *    Instead, the kernel set this bit field. Then the trap entry/exit code
 + *    knows if we are entering/exiting the context that owns preempt_v.
 + *     - 0: the task is not using preempt_v
 + *     - 1: the task is actively using preempt_v. But whether does the task own
 + *          the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK.
 + *  - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine
 + *     when preempt_v starts:
 + *     - 0: the task is actively using, and own preempt_v context.
 + *     - non-zero: the task was using preempt_v, but then took a trap within.
 + *       Thus, the task does not own preempt_v. Any use of Vector will have to
 + *       save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
 + *       Vector.
 + *  - bit 30: The in-kernel preempt_v context is saved, and requries to be
 + *    restored when returning to the context that owns the preempt_v.
 + *  - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
 + *    trap entry code. Any context switches out-of current task need to save
 + *    it to the task's in-kernel V context. Also, any traps nesting on-top-of
 + *    preempt_v requesting to use V needs a save.
 + */
 +#define RISCV_V_CTX_DEPTH_MASK                0x00ff0000
 +
 +#define RISCV_V_CTX_UNIT_DEPTH                0x00010000
 +#define RISCV_KERNEL_MODE_V           0x00000001
 +#define RISCV_PREEMPT_V                       0x00000100
 +#define RISCV_PREEMPT_V_DIRTY         0x80000000
 +#define RISCV_PREEMPT_V_NEED_RESTORE  0x40000000
 +
  /* CPU-specific state of a task */
  struct thread_struct {
        /* Callee-saved registers */
        unsigned long s[12];    /* s[0]: frame pointer */
        struct __riscv_d_ext_state fstate;
        unsigned long bad_cause;
 -      unsigned long vstate_ctrl;
 +      u32 riscv_v_flags;
 +      u32 vstate_ctrl;
        struct __riscv_v_ext_state vstate;
        unsigned long align_ctl;
 +      struct __riscv_v_ext_state kernel_vstate;
  };
  
  /* Whitelist the fstate from the task_struct for hardened usercopy */
index 2e0db9c5be6c334f9ed7d0187fae6ed6de950745,36e78d991d5e29d86b85143c548878d0aeef8208..3b29ca3bb3d40d1aa11433d7433ba62ad85e7cbe
@@@ -4,63 -4,83 +4,86 @@@
  #include <sys/mman.h>
  #include <sys/resource.h>
  #include <stddef.h>
+ #include <strings.h>
+ #include "../../kselftest_harness.h"
  
  #define TOP_DOWN 0
  #define BOTTOM_UP 1
  
- struct addresses {
-       int *no_hint;
-       int *on_37_addr;
-       int *on_38_addr;
-       int *on_46_addr;
-       int *on_47_addr;
-       int *on_55_addr;
-       int *on_56_addr;
+ #if __riscv_xlen == 64
+ uint64_t random_addresses[] = {
+       0x19764f0d73b3a9f0, 0x016049584cecef59, 0x3580bdd3562f4acd,
+       0x1164219f20b17da0, 0x07d97fcb40ff2373, 0x76ec528921272ee7,
+       0x4dd48c38a3de3f70, 0x2e11415055f6997d, 0x14b43334ac476c02,
+       0x375a60795aff19f6, 0x47f3051725b8ee1a, 0x4e697cf240494a9f,
+       0x456b59b5c2f9e9d1, 0x101724379d63cb96, 0x7fe9ad31619528c1,
+       0x2f417247c495c2ea, 0x329a5a5b82943a5e, 0x06d7a9d6adcd3827,
+       0x327b0b9ee37f62d5, 0x17c7b1851dfd9b76, 0x006ebb6456ec2cd9,
+       0x00836cd14146a134, 0x00e5c4dcde7126db, 0x004c29feadf75753,
+       0x00d8b20149ed930c, 0x00d71574c269387a, 0x0006ebe4a82acb7a,
+       0x0016135df51f471b, 0x00758bdb55455160, 0x00d0bdd949b13b32,
+       0x00ecea01e7c5f54b, 0x00e37b071b9948b1, 0x0011fdd00ff57ab3,
+       0x00e407294b52f5ea, 0x00567748c200ed20, 0x000d073084651046,
+       0x00ac896f4365463c, 0x00eb0d49a0b26216, 0x0066a2564a982a31,
+       0x002e0d20237784ae, 0x0000554ff8a77a76, 0x00006ce07a54c012,
+       0x000009570516d799, 0x00000954ca15b84d, 0x0000684f0d453379,
+       0x00002ae5816302b5, 0x0000042403fb54bf, 0x00004bad7392bf30,
+       0x00003e73bfa4b5e3, 0x00005442c29978e0, 0x00002803f11286b6,
+       0x000073875d745fc6, 0x00007cede9cb8240, 0x000027df84cc6a4f,
+       0x00006d7e0e74242a, 0x00004afd0b836e02, 0x000047d0e837cd82,
+       0x00003b42405efeda, 0x00001531bafa4c95, 0x00007172cae34ac4,
+ };
+ #else
+ uint32_t random_addresses[] = {
+       0x8dc302e0, 0x929ab1e0, 0xb47683ba, 0xea519c73, 0xa19f1c90, 0xc49ba213,
+       0x8f57c625, 0xadfe5137, 0x874d4d95, 0xaa20f09d, 0xcf21ebfc, 0xda7737f1,
+       0xcedf392a, 0x83026c14, 0xccedca52, 0xc6ccf826, 0xe0cd9415, 0x997472ca,
+       0xa21a44c1, 0xe82196f5, 0xa23fd66b, 0xc28d5590, 0xd009cdce, 0xcf0be646,
+       0x8fc8c7ff, 0xe2a85984, 0xa3d3236b, 0x89a0619d, 0xc03db924, 0xb5d4cc1b,
+       0xb96ee04c, 0xd191da48, 0xb432a000, 0xaa2bebbc, 0xa2fcb289, 0xb0cca89b,
+       0xb0c18d6a, 0x88f58deb, 0xa4d42d1c, 0xe4d74e86, 0x99902b09, 0x8f786d31,
+       0xbec5e381, 0x9a727e65, 0xa9a65040, 0xa880d789, 0x8f1b335e, 0xfc821c1e,
+       0x97e34be4, 0xbbef84ed, 0xf447d197, 0xfd7ceee2, 0xe632348d, 0xee4590f4,
+       0x958992a5, 0xd57e05d6, 0xfd240970, 0xc5b0dcff, 0xd96da2c2, 0xa7ae041d,
  };
+ #endif
  
- static inline void do_mmaps(struct addresses *mmap_addresses)
- {
-       /*
-        * Place all of the hint addresses on the boundaries of mmap
-        * sv39, sv48, sv57
-        * User addresses end at 1<<38, 1<<47, 1<<56 respectively
-        */
-       void *on_37_bits = (void *)(1UL << 37);
-       void *on_38_bits = (void *)(1UL << 38);
-       void *on_46_bits = (void *)(1UL << 46);
-       void *on_47_bits = (void *)(1UL << 47);
-       void *on_55_bits = (void *)(1UL << 55);
-       void *on_56_bits = (void *)(1UL << 56);
 +// Only works on 64 bit
 +#if __riscv_xlen == 64
+ #define PROT (PROT_READ | PROT_WRITE)
+ #define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
  
-       int prot = PROT_READ | PROT_WRITE;
-       int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+ /* mmap must return a value that doesn't use more bits than the hint address. */
+ static inline unsigned long get_max_value(unsigned long input)
+ {
+       unsigned long max_bit = (1UL << (((sizeof(unsigned long) * 8) - 1 -
+                                         __builtin_clzl(input))));
  
-       mmap_addresses->no_hint =
-               mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0);
-       mmap_addresses->on_37_addr =
-               mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0);
-       mmap_addresses->on_38_addr =
-               mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0);
-       mmap_addresses->on_46_addr =
-               mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0);
-       mmap_addresses->on_47_addr =
-               mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0);
-       mmap_addresses->on_55_addr =
-               mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0);
-       mmap_addresses->on_56_addr =
-               mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0);
+       return max_bit + (max_bit - 1);
  }
+ #define TEST_MMAPS                                                            \
+       ({                                                                    \
+               void *mmap_addr;                                              \
+               for (int i = 0; i < ARRAY_SIZE(random_addresses); i++) {      \
+                       mmap_addr = mmap((void *)random_addresses[i],         \
+                                        5 * sizeof(int), PROT, FLAGS, 0, 0); \
+                       EXPECT_NE(MAP_FAILED, mmap_addr);                     \
+                       EXPECT_GE((void *)get_max_value(random_addresses[i]), \
+                                 mmap_addr);                                 \
+                       mmap_addr = mmap((void *)random_addresses[i],         \
+                                        5 * sizeof(int), PROT, FLAGS, 0, 0); \
+                       EXPECT_NE(MAP_FAILED, mmap_addr);                     \
+                       EXPECT_GE((void *)get_max_value(random_addresses[i]), \
+                                 mmap_addr);                                 \
+               }                                                             \
+       })
 +#endif /* __riscv_xlen == 64 */
  
  static inline int memory_layout(void)
  {
-       int prot = PROT_READ | PROT_WRITE;
-       int flags = MAP_PRIVATE | MAP_ANONYMOUS;
-       void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0);
-       void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0);
+       void *value1 = mmap(NULL, sizeof(int), PROT, FLAGS, 0, 0);
+       void *value2 = mmap(NULL, sizeof(int), PROT, FLAGS, 0, 0);
  
        return value2 > value1;
  }
This page took 0.097553 seconds and 4 git commands to generate.