From: Linus Torvalds Date: Fri, 7 Aug 2020 16:29:25 +0000 (-0700) Subject: Merge branch 'work.regset' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs X-Git-Url: https://repo.jachan.dev/J-linux.git/commitdiff_plain/19b39c38abf68591edbd698740d410c37ee075cc?hp=-c Merge branch 'work.regset' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs Pull ptrace regset updates from Al Viro: "Internal regset API changes: - regularize copy_regset_{to,from}_user() callers - switch to saner calling conventions for ->get() - kill user_regset_copyout() The ->put() side of things will have to wait for the next cycle, unfortunately. The balance is about -1KLoC and replacements for ->get() instances are a lot saner" * 'work.regset' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (41 commits) regset: kill user_regset_copyout{,_zero}() regset(): kill ->get_size() regset: kill ->get() csky: switch to ->regset_get() xtensa: switch to ->regset_get() parisc: switch to ->regset_get() nds32: switch to ->regset_get() nios2: switch to ->regset_get() hexagon: switch to ->regset_get() h8300: switch to ->regset_get() openrisc: switch to ->regset_get() riscv: switch to ->regset_get() c6x: switch to ->regset_get() ia64: switch to ->regset_get() arc: switch to ->regset_get() arm: switch to ->regset_get() sh: convert to ->regset_get() arm64: switch to ->regset_get() mips: switch to ->regset_get() sparc: switch to ->regset_get() ... --- 19b39c38abf68591edbd698740d410c37ee075cc diff --combined arch/arm64/kernel/ptrace.c index 1e02e98e68dd,5bf737d38b26..d8ebfd813e28 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@@ -474,11 -474,10 +474,10 @@@ static int ptrace_hbp_set_addr(unsigne static int hw_break_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { unsigned int note_type = regset->core_note_type; - int ret, idx = 0, offset, limit; + int ret, idx = 0; u32 info, ctrl; u64 addr; @@@ -487,49 -486,21 +486,21 @@@ if (ret) return ret; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &info, 0, - sizeof(info)); - if (ret) - return ret; - - /* Pad */ - offset = offsetof(struct user_hwdebug_state, pad); - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, offset, - offset + PTRACE_HBP_PAD_SZ); - if (ret) - return ret; - + membuf_write(&to, &info, sizeof(info)); + membuf_zero(&to, sizeof(u32)); /* (address, ctrl) registers */ - offset = offsetof(struct user_hwdebug_state, dbg_regs); - limit = regset->n * regset->size; - while (count && offset < limit) { + while (to.left) { ret = ptrace_hbp_get_addr(note_type, target, idx, &addr); if (ret) return ret; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &addr, - offset, offset + PTRACE_HBP_ADDR_SZ); - if (ret) - return ret; - offset += PTRACE_HBP_ADDR_SZ; - ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl); if (ret) return ret; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &ctrl, - offset, offset + PTRACE_HBP_CTRL_SZ); - if (ret) - return ret; - offset += PTRACE_HBP_CTRL_SZ; - - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - offset, - offset + PTRACE_HBP_PAD_SZ); - if (ret) - return ret; - offset += PTRACE_HBP_PAD_SZ; + membuf_store(&to, addr); + membuf_store(&to, ctrl); + membuf_zero(&to, sizeof(u32)); idx++; } - return 0; } @@@ -589,11 -560,10 +560,10 @@@ static int hw_break_set(struct task_str static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct user_pt_regs *uregs = &task_pt_regs(target)->user_regs; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); + return membuf_write(&to, uregs, sizeof(*uregs)); } static int gpr_set(struct task_struct *target, const struct user_regset *regset, @@@ -626,8 -596,7 +596,7 @@@ static int fpr_active(struct task_struc */ static int __fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, unsigned int start_pos) + struct membuf to) { struct user_fpsimd_state *uregs; @@@ -635,13 -604,11 +604,11 @@@ uregs = &target->thread.uw.fpsimd_state; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, - start_pos, start_pos + sizeof(*uregs)); + return membuf_write(&to, uregs, sizeof(*uregs)); } static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { if (!system_supports_fpsimd()) return -EINVAL; @@@ -649,7 -616,7 +616,7 @@@ if (target == current) fpsimd_preserve_current_state(); - return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0); + return __fpr_get(target, regset, to); } static int __fpr_set(struct task_struct *target, @@@ -699,15 -666,12 +666,12 @@@ static int fpr_set(struct task_struct * } static int tls_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - unsigned long *tls = &target->thread.uw.tp_value; - if (target == current) tls_preserve_current_state(); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); + return membuf_store(&to, target->thread.uw.tp_value); } static int tls_set(struct task_struct *target, const struct user_regset *regset, @@@ -727,13 -691,9 +691,9 @@@ static int system_call_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - int syscallno = task_pt_regs(target)->syscallno; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &syscallno, 0, -1); + return membuf_store(&to, task_pt_regs(target)->syscallno); } static int system_call_set(struct task_struct *target, @@@ -780,24 -740,10 +740,10 @@@ static unsigned int sve_size_from_heade return ALIGN(header->size, SVE_VQ_BYTES); } - static unsigned int sve_get_size(struct task_struct *target, - const struct user_regset *regset) - { - struct user_sve_header header; - - if (!system_supports_sve()) - return 0; - - sve_init_header_from_task(&header, target); - return sve_size_from_header(&header); - } - static int sve_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; struct user_sve_header header; unsigned int vq; unsigned long start, end; @@@ -809,10 -755,7 +755,7 @@@ sve_init_header_from_task(&header, target); vq = sve_vq_from_vl(header.vl); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header, - 0, sizeof(header)); - if (ret) - return ret; + membuf_write(&to, &header, sizeof(header)); if (target == current) fpsimd_preserve_current_state(); @@@ -821,26 -764,18 +764,18 @@@ BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) - return __fpr_get(target, regset, pos, count, kbuf, ubuf, - SVE_PT_FPSIMD_OFFSET); + return __fpr_get(target, regset, to); /* Otherwise: full SVE case */ BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); start = SVE_PT_SVE_OFFSET; end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.sve_state, - start, end); - if (ret) - return ret; + membuf_write(&to, target->thread.sve_state, end - start); start = end; end = SVE_PT_SVE_FPSR_OFFSET(vq); - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - start, end); - if (ret) - return ret; + membuf_zero(&to, end - start); /* * Copy fpsr, and fpcr which must follow contiguously in @@@ -848,16 -783,11 +783,11 @@@ */ start = end; end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.uw.fpsimd_state.fpsr, - start, end); - if (ret) - return ret; + membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, end - start); start = end; end = sve_size_from_header(&header); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - start, end); + return membuf_zero(&to, end - start); } static int sve_set(struct task_struct *target, @@@ -961,8 -891,7 +891,7 @@@ out #ifdef CONFIG_ARM64_PTR_AUTH static int pac_mask_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { /* * The PAC bits can differ across data and instruction pointers @@@ -978,7 -907,7 +907,7 @@@ if (!system_supports_address_auth()) return -EINVAL; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &uregs, 0, -1); + return membuf_write(&to, &uregs, sizeof(uregs)); } #ifdef CONFIG_CHECKPOINT_RESTORE @@@ -1017,8 -946,7 +946,7 @@@ static void pac_address_keys_from_user( static int pac_address_keys_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct ptrauth_keys_user *keys = &target->thread.keys_user; struct user_pac_address_keys user_keys; @@@ -1028,8 -956,7 +956,7 @@@ pac_address_keys_to_user(&user_keys, keys); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &user_keys, 0, -1); + return membuf_write(&to, &user_keys, sizeof(user_keys)); } static int pac_address_keys_set(struct task_struct *target, @@@ -1068,8 -995,7 +995,7 @@@ static void pac_generic_keys_from_user( static int pac_generic_keys_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct ptrauth_keys_user *keys = &target->thread.keys_user; struct user_pac_generic_keys user_keys; @@@ -1079,8 -1005,7 +1005,7 @@@ pac_generic_keys_to_user(&user_keys, keys); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &user_keys, 0, -1); + return membuf_write(&to, &user_keys, sizeof(user_keys)); } static int pac_generic_keys_set(struct task_struct *target, @@@ -1134,7 -1059,7 +1059,7 @@@ static const struct user_regset aarch64 .n = sizeof(struct user_pt_regs) / sizeof(u64), .size = sizeof(u64), .align = sizeof(u64), - .get = gpr_get, + .regset_get = gpr_get, .set = gpr_set }, [REGSET_FPR] = { @@@ -1147,7 -1072,7 +1072,7 @@@ .size = sizeof(u32), .align = sizeof(u32), .active = fpr_active, - .get = fpr_get, + .regset_get = fpr_get, .set = fpr_set }, [REGSET_TLS] = { @@@ -1155,7 -1080,7 +1080,7 @@@ .n = 1, .size = sizeof(void *), .align = sizeof(void *), - .get = tls_get, + .regset_get = tls_get, .set = tls_set, }, #ifdef CONFIG_HAVE_HW_BREAKPOINT @@@ -1164,7 -1089,7 +1089,7 @@@ .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = hw_break_get, + .regset_get = hw_break_get, .set = hw_break_set, }, [REGSET_HW_WATCH] = { @@@ -1172,7 -1097,7 +1097,7 @@@ .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = hw_break_get, + .regset_get = hw_break_get, .set = hw_break_set, }, #endif @@@ -1181,7 -1106,7 +1106,7 @@@ .n = 1, .size = sizeof(int), .align = sizeof(int), - .get = system_call_get, + .regset_get = system_call_get, .set = system_call_set, }, #ifdef CONFIG_ARM64_SVE @@@ -1191,9 -1116,8 +1116,8 @@@ SVE_VQ_BYTES), .size = SVE_VQ_BYTES, .align = SVE_VQ_BYTES, - .get = sve_get, + .regset_get = sve_get, .set = sve_set, - .get_size = sve_get_size, }, #endif #ifdef CONFIG_ARM64_PTR_AUTH @@@ -1202,7 -1126,7 +1126,7 @@@ .n = sizeof(struct user_pac_mask) / sizeof(u64), .size = sizeof(u64), .align = sizeof(u64), - .get = pac_mask_get, + .regset_get = pac_mask_get, /* this cannot be set dynamically */ }, #ifdef CONFIG_CHECKPOINT_RESTORE @@@ -1211,7 -1135,7 +1135,7 @@@ .n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t), .size = sizeof(__uint128_t), .align = sizeof(__uint128_t), - .get = pac_address_keys_get, + .regset_get = pac_address_keys_get, .set = pac_address_keys_set, }, [REGSET_PACG_KEYS] = { @@@ -1219,7 -1143,7 +1143,7 @@@ .n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t), .size = sizeof(__uint128_t), .align = sizeof(__uint128_t), - .get = pac_generic_keys_get, + .regset_get = pac_generic_keys_get, .set = pac_generic_keys_set, }, #endif @@@ -1237,57 -1161,31 +1161,31 @@@ enum compat_regset REGSET_COMPAT_VFP, }; + static inline compat_ulong_t compat_get_user_reg(struct task_struct *task, int idx) + { + struct pt_regs *regs = task_pt_regs(task); + + switch (idx) { + case 15: + return regs->pc; + case 16: + return pstate_to_compat_psr(regs->pstate); + case 17: + return regs->orig_x0; + default: + return regs->regs[idx]; + } + } + static int compat_gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - int ret = 0; - unsigned int i, start, num_regs; - - /* Calculate the number of AArch32 registers contained in count */ - num_regs = count / regset->size; - - /* Convert pos into an register number */ - start = pos / regset->size; - - if (start + num_regs > regset->n) - return -EIO; - - for (i = 0; i < num_regs; ++i) { - unsigned int idx = start + i; - compat_ulong_t reg; - - switch (idx) { - case 15: - reg = task_pt_regs(target)->pc; - break; - case 16: - reg = task_pt_regs(target)->pstate; - reg = pstate_to_compat_psr(reg); - break; - case 17: - reg = task_pt_regs(target)->orig_x0; - break; - default: - reg = task_pt_regs(target)->regs[idx]; - } - - if (kbuf) { - memcpy(kbuf, ®, sizeof(reg)); - kbuf += sizeof(reg); - } else { - ret = copy_to_user(ubuf, ®, sizeof(reg)); - if (ret) { - ret = -EFAULT; - break; - } - - ubuf += sizeof(reg); - } - } + int i = 0; - return ret; + while (to.left) + membuf_store(&to, compat_get_user_reg(target, i++)); + return 0; } static int compat_gpr_set(struct task_struct *target, @@@ -1354,12 -1252,10 +1252,10 @@@ static int compat_vfp_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct user_fpsimd_state *uregs; compat_ulong_t fpscr; - int ret, vregs_end_pos; if (!system_supports_fpsimd()) return -EINVAL; @@@ -1373,19 -1269,10 +1269,10 @@@ * The VFP registers are packed into the fpsimd_state, so they all sit * nicely together for us. We just need to create the fpscr separately. */ - vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, - 0, vregs_end_pos); - - if (count && !ret) { - fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | - (uregs->fpcr & VFP_FPSCR_CTRL_MASK); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr, - vregs_end_pos, VFP_STATE_SIZE); - } - - return ret; + membuf_write(&to, uregs, VFP_STATE_SIZE - sizeof(compat_ulong_t)); + fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | + (uregs->fpcr & VFP_FPSCR_CTRL_MASK); + return membuf_store(&to, fpscr); } static int compat_vfp_set(struct task_struct *target, @@@ -1420,11 -1307,10 +1307,10 @@@ } static int compat_tls_get(struct task_struct *target, - const struct user_regset *regset, unsigned int pos, - unsigned int count, void *kbuf, void __user *ubuf) + const struct user_regset *regset, + struct membuf to) { - compat_ulong_t tls = (compat_ulong_t)target->thread.uw.tp_value; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + return membuf_store(&to, (compat_ulong_t)target->thread.uw.tp_value); } static int compat_tls_set(struct task_struct *target, @@@ -1449,7 -1335,7 +1335,7 @@@ static const struct user_regset aarch32 .n = COMPAT_ELF_NGREG, .size = sizeof(compat_elf_greg_t), .align = sizeof(compat_elf_greg_t), - .get = compat_gpr_get, + .regset_get = compat_gpr_get, .set = compat_gpr_set }, [REGSET_COMPAT_VFP] = { @@@ -1458,7 -1344,7 +1344,7 @@@ .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), .active = fpr_active, - .get = compat_vfp_get, + .regset_get = compat_vfp_get, .set = compat_vfp_set }, }; @@@ -1474,7 -1360,7 +1360,7 @@@ static const struct user_regset aarch32 .n = COMPAT_ELF_NGREG, .size = sizeof(compat_elf_greg_t), .align = sizeof(compat_elf_greg_t), - .get = compat_gpr_get, + .regset_get = compat_gpr_get, .set = compat_gpr_set }, [REGSET_FPR] = { @@@ -1482,7 -1368,7 +1368,7 @@@ .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), - .get = compat_vfp_get, + .regset_get = compat_vfp_get, .set = compat_vfp_set }, [REGSET_TLS] = { @@@ -1490,7 -1376,7 +1376,7 @@@ .n = 1, .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), - .get = compat_tls_get, + .regset_get = compat_tls_get, .set = compat_tls_set, }, #ifdef CONFIG_HAVE_HW_BREAKPOINT @@@ -1499,7 -1385,7 +1385,7 @@@ .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = hw_break_get, + .regset_get = hw_break_get, .set = hw_break_set, }, [REGSET_HW_WATCH] = { @@@ -1507,7 -1393,7 +1393,7 @@@ .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = hw_break_get, + .regset_get = hw_break_get, .set = hw_break_set, }, #endif @@@ -1516,7 -1402,7 +1402,7 @@@ .n = 1, .size = sizeof(int), .align = sizeof(int), - .get = system_call_get, + .regset_get = system_call_get, .set = system_call_set, }, }; @@@ -1541,9 -1427,7 +1427,7 @@@ static int compat_ptrace_read_user(stru else if (off == COMPAT_PT_TEXT_END_ADDR) tmp = tsk->mm->end_code; else if (off < sizeof(compat_elf_gregset_t)) - return copy_regset_to_user(tsk, &user_aarch32_view, - REGSET_COMPAT_GPR, off, - sizeof(compat_ulong_t), ret); + tmp = compat_get_user_reg(tsk, off >> 2); else if (off >= COMPAT_USER_SZ) return -EIO; else @@@ -1555,8 -1439,8 +1439,8 @@@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t val) { - int ret; - mm_segment_t old_fs = get_fs(); + struct pt_regs newregs = *task_pt_regs(tsk); + unsigned int idx = off / 4; if (off & 3 || off >= COMPAT_USER_SZ) return -EIO; @@@ -1564,14 -1448,25 +1448,25 @@@ if (off >= sizeof(compat_elf_gregset_t)) return 0; - set_fs(KERNEL_DS); - ret = copy_regset_from_user(tsk, &user_aarch32_view, - REGSET_COMPAT_GPR, off, - sizeof(compat_ulong_t), - &val); - set_fs(old_fs); + switch (idx) { + case 15: + newregs.pc = val; + break; + case 16: + newregs.pstate = compat_psr_to_pstate(val); + break; + case 17: + newregs.orig_x0 = val; + break; + default: + newregs.regs[idx] = val; + } - return ret; + if (!valid_user_regs(&newregs.user_regs, tsk)) + return -EINVAL; + + *task_pt_regs(tsk) = newregs; + return 0; } #ifdef CONFIG_HAVE_HW_BREAKPOINT @@@ -1811,42 -1706,19 +1706,42 @@@ static void tracehook_report_syscall(st unsigned long saved_reg; /* - * A scratch register (ip(r12) on AArch32, x7 on AArch64) is - * used to denote syscall entry/exit: + * We have some ABI weirdness here in the way that we handle syscall + * exit stops because we indicate whether or not the stop has been + * signalled from syscall entry or syscall exit by clobbering a general + * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee + * and restoring its old value after the stop. This means that: + * + * - Any writes by the tracer to this register during the stop are + * ignored/discarded. + * + * - The actual value of the register is not available during the stop, + * so the tracer cannot save it and restore it later. + * + * - Syscall stops behave differently to seccomp and pseudo-step traps + * (the latter do not nobble any registers). */ regno = (is_compat_task() ? 12 : 7); saved_reg = regs->regs[regno]; regs->regs[regno] = dir; - if (dir == PTRACE_SYSCALL_EXIT) + if (dir == PTRACE_SYSCALL_ENTER) { + if (tracehook_report_syscall_entry(regs)) + forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else if (!test_thread_flag(TIF_SINGLESTEP)) { tracehook_report_syscall_exit(regs, 0); - else if (tracehook_report_syscall_entry(regs)) - forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else { + regs->regs[regno] = saved_reg; - regs->regs[regno] = saved_reg; + /* + * Signal a pseudo-step exception since we are stepping but + * tracer modifications to the registers may have rewound the + * state machine. + */ + tracehook_report_syscall_exit(regs, 1); + } } int syscall_trace_enter(struct pt_regs *regs) @@@ -1856,12 -1728,12 +1751,12 @@@ if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); if (!in_syscall(regs) || (flags & _TIF_SYSCALL_EMU)) - return -1; + return NO_SYSCALL; } /* Do the secure computing after ptrace; failures should be fast. */ if (secure_computing() == -1) - return -1; + return NO_SYSCALL; if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, regs->syscallno); @@@ -1874,14 -1746,12 +1769,14 @@@ void syscall_trace_exit(struct pt_regs *regs) { + unsigned long flags = READ_ONCE(current_thread_info()->flags); + audit_syscall_exit(regs); - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + if (flags & _TIF_SYSCALL_TRACEPOINT) trace_sys_exit(regs, regs_return_value(regs)); - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); rseq_syscall(regs); @@@ -1959,8 -1829,8 +1854,8 @@@ static int valid_native_regs(struct use */ int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task) { - if (!test_tsk_thread_flag(task, TIF_SINGLESTEP)) - regs->pstate &= ~DBG_SPSR_SS; + /* https://lore.kernel.org/lkml/20191118131525.GA4180@willie-the-truck */ + user_regs_reset_single_step(regs, task); if (is_compat_thread(task_thread_info(task))) return valid_compat_regs(regs); diff --combined arch/csky/kernel/ptrace.c index b06612c408c4,6d93db817429..d822144906ac --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@@ -76,17 -76,14 +76,14 @@@ enum csky_regset static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - struct pt_regs *regs; - - regs = task_pt_regs(target); + struct pt_regs *regs = task_pt_regs(target); /* Abiv1 regs->tls is fake and we need sync here. */ regs->tls = task_thread_info(target)->tp_value; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, regs, 0, -1); + return membuf_write(&to, regs, sizeof(regs)); } static int gpr_set(struct task_struct *target, @@@ -114,8 -111,7 +111,7 @@@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct user_fp *regs = (struct user_fp *)&target->thread.user_fp; @@@ -131,9 -127,9 +127,9 @@@ for (i = 0; i < 32; i++) tmp.vr[64 + i] = regs->vr[32 + i]; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tmp, 0, -1); + return membuf_write(&to, &tmp, sizeof(tmp)); #else - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, regs, 0, -1); + return membuf_write(&to, regs, sizeof(*regs)); #endif } @@@ -173,16 -169,16 +169,16 @@@ static const struct user_regset csky_re .n = sizeof(struct pt_regs) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = &gpr_get, - .set = &gpr_set, + .regset_get = gpr_get, + .set = gpr_set, }, [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_fp) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = &fpr_get, - .set = &fpr_set, + .regset_get = fpr_get, + .set = fpr_set, }, }; @@@ -320,20 -316,16 +316,20 @@@ long arch_ptrace(struct task_struct *ch return ret; } -asmlinkage void syscall_trace_enter(struct pt_regs *regs) +asmlinkage int syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) if (tracehook_report_syscall_entry(regs)) - syscall_set_nr(current, regs, -1); + return -1; + + if (secure_computing() == -1) + return -1; if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, syscall_get_nr(current, regs)); audit_syscall_entry(regs_syscallid(regs), regs->a0, regs->a1, regs->a2, regs->a3); + return 0; } asmlinkage void syscall_trace_exit(struct pt_regs *regs) @@@ -347,8 -339,13 +343,8 @@@ trace_sys_exit(regs, syscall_get_return_value(current, regs)); } -extern void show_stack(struct task_struct *task, unsigned long *stack, const char *loglvl); void show_regs(struct pt_regs *fp) { - unsigned long *sp; - unsigned char *tp; - int i; - pr_info("\nCURRENT PROCESS:\n\n"); pr_info("COMM=%s PID=%d\n", current->comm, current->pid); @@@ -395,9 -392,29 +391,9 @@@ fp->regs[0], fp->regs[1], fp->regs[2], fp->regs[3]); pr_info("r10: 0x%08lx r11: 0x%08lx r12: 0x%08lx r13: 0x%08lx\n", fp->regs[4], fp->regs[5], fp->regs[6], fp->regs[7]); - pr_info("r14: 0x%08lx r1: 0x%08lx r15: 0x%08lx\n", - fp->regs[8], fp->regs[9], fp->lr); + pr_info("r14: 0x%08lx r1: 0x%08lx\n", + fp->regs[8], fp->regs[9]); #endif - pr_info("\nCODE:"); - tp = ((unsigned char *) fp->pc) - 0x20; - tp += ((int)tp % 4) ? 2 : 0; - for (sp = (unsigned long *) tp, i = 0; (i < 0x40); i += 4) { - if ((i % 0x10) == 0) - pr_cont("\n%08x: ", (int) (tp + i)); - pr_cont("%08x ", (int) *sp++); - } - pr_cont("\n"); - - pr_info("\nKERNEL STACK:"); - tp = ((unsigned char *) fp) - 0x40; - for (sp = (unsigned long *) tp, i = 0; (i < 0xc0); i += 4) { - if ((i % 0x10) == 0) - pr_cont("\n%08x: ", (int) (tp + i)); - pr_cont("%08x ", (int) *sp++); - } - pr_cont("\n"); - - show_stack(NULL, (unsigned long *)fp->regs[4], KERN_INFO); return; } diff --combined arch/s390/kernel/ptrace.c index 3cc15c066298,367a6878bf6e..a09b9e98936c --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@@ -323,25 -323,6 +323,25 @@@ static inline void __poke_user_per(stru child->thread.per_user.end = data; } +static void fixup_int_code(struct task_struct *child, addr_t data) +{ + struct pt_regs *regs = task_pt_regs(child); + int ilc = regs->int_code >> 16; + u16 insn; + + if (ilc > 6) + return; + + if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), + &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) + return; + + /* double check that tracee stopped on svc instruction */ + if ((insn >> 8) != 0xa) + return; + + regs->int_code = 0x20000 | (data & 0xffff); +} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@@ -353,9 -334,7 +353,9 @@@ static int __poke_user(struct task_stru struct user *dummy = NULL; addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ @@@ -373,11 -352,7 +373,11 @@@ /* Invalid addressing mode bits */ return -EINVAL; } - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct user, regs.gprs[2])) + fixup_int_code(child, data); + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* @@@ -743,10 -718,6 +743,10 @@@ static int __poke_user_compat(struct ta regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct compat_user, regs.gprs[2])) + fixup_int_code(child, data); /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } @@@ -866,66 -837,40 +866,66 @@@ long compat_arch_ptrace(struct task_str asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { unsigned long mask = -1UL; + long ret = -1; + + if (is_compat_task()) + mask = 0xffffffff; /* * The sysc_tracesys code in entry.S stored the system * call number to gprs[2]. */ if (test_thread_flag(TIF_SYSCALL_TRACE) && - (tracehook_report_syscall_entry(regs) || - regs->gprs[2] >= NR_syscalls)) { + tracehook_report_syscall_entry(regs)) { /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip + * Tracing decided this syscall should not happen. Skip * the system call and the system call restart handling. */ - clear_pt_regs_flag(regs, PIF_SYSCALL); - return -1; + goto skip; } +#ifdef CONFIG_SECCOMP /* Do the secure computing check after ptrace. */ - if (secure_computing()) { - /* seccomp failures shouldn't expose any additional code. */ - return -1; + if (unlikely(test_thread_flag(TIF_SECCOMP))) { + struct seccomp_data sd; + + if (is_compat_task()) { + sd.instruction_pointer = regs->psw.addr & 0x7fffffff; + sd.arch = AUDIT_ARCH_S390; + } else { + sd.instruction_pointer = regs->psw.addr; + sd.arch = AUDIT_ARCH_S390X; + } + + sd.nr = regs->int_code & 0xffff; + sd.args[0] = regs->orig_gpr2 & mask; + sd.args[1] = regs->gprs[3] & mask; + sd.args[2] = regs->gprs[4] & mask; + sd.args[3] = regs->gprs[5] & mask; + sd.args[4] = regs->gprs[6] & mask; + sd.args[5] = regs->gprs[7] & mask; + + if (__secure_computing(&sd) == -1) + goto skip; } +#endif /* CONFIG_SECCOMP */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gprs[2]); + trace_sys_enter(regs, regs->int_code & 0xffff); - if (is_compat_task()) - mask = 0xffffffff; - audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask, + audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask, regs->gprs[3] &mask, regs->gprs[4] &mask, regs->gprs[5] &mask); + if ((signed long)regs->gprs[2] >= NR_syscalls) { + regs->gprs[2] = -ENOSYS; + ret = -ENOSYS; + } return regs->gprs[2]; +skip: + clear_pt_regs_flag(regs, PIF_SYSCALL); + return ret; } asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) @@@ -945,28 -890,14 +945,14 @@@ static int s390_regs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { + unsigned pos; if (target == current) save_access_regs(target->thread.acrs); - if (kbuf) { - unsigned long *k = kbuf; - while (count > 0) { - *k++ = __peek_user(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - unsigned long __user *u = ubuf; - while (count > 0) { - if (__put_user(__peek_user(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long)) + membuf_store(&to, __peek_user(target, pos)); return 0; } @@@ -1007,8 -938,8 +993,8 @@@ static int s390_regs_set(struct task_st } static int s390_fpregs_get(struct task_struct *target, - const struct user_regset *regset, unsigned int pos, - unsigned int count, void *kbuf, void __user *ubuf) + const struct user_regset *regset, + struct membuf to) { _s390_fp_regs fp_regs; @@@ -1018,8 -949,7 +1004,7 @@@ fp_regs.fpc = target->thread.fpu.fpc; fpregs_store(&fp_regs, &target->thread.fpu); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fp_regs, 0, -1); + return membuf_write(&to, &fp_regs, sizeof(fp_regs)); } static int s390_fpregs_set(struct task_struct *target, @@@ -1066,20 -996,9 +1051,9 @@@ static int s390_last_break_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - if (count > 0) { - if (kbuf) { - unsigned long *k = kbuf; - *k = target->thread.last_break; - } else { - unsigned long __user *u = ubuf; - if (__put_user(target->thread.last_break, u)) - return -EFAULT; - } - } - return 0; + return membuf_store(&to, target->thread.last_break); } static int s390_last_break_set(struct task_struct *target, @@@ -1092,16 -1011,13 +1066,13 @@@ static int s390_tdb_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_pt_regs(target); - unsigned char *data; if (!(regs->int_code & 0x200)) return -ENODATA; - data = target->thread.trap_tdb; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256); + return membuf_write(&to, target->thread.trap_tdb, 256); } static int s390_tdb_set(struct task_struct *target, @@@ -1114,8 -1030,7 +1085,7 @@@ static int s390_vxrs_low_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { __u64 vxrs[__NUM_VXRS_LOW]; int i; @@@ -1126,7 -1041,7 +1096,7 @@@ save_fpu_regs(); for (i = 0; i < __NUM_VXRS_LOW; i++) vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + return membuf_write(&to, vxrs, sizeof(vxrs)); } static int s390_vxrs_low_set(struct task_struct *target, @@@ -1155,18 -1070,14 +1125,14 @@@ static int s390_vxrs_high_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - __vector128 vxrs[__NUM_VXRS_HIGH]; - if (!MACHINE_HAS_VX) return -ENODEV; if (target == current) save_fpu_regs(); - memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs)); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW, + __NUM_VXRS_HIGH * sizeof(__vector128)); } static int s390_vxrs_high_set(struct task_struct *target, @@@ -1188,12 -1099,9 +1154,9 @@@ static int s390_system_call_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - unsigned int *data = &target->thread.system_call; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(unsigned int)); + return membuf_store(&to, target->thread.system_call); } static int s390_system_call_set(struct task_struct *target, @@@ -1208,8 -1116,7 +1171,7 @@@ static int s390_gs_cb_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct gs_cb *data = target->thread.gs_cb; @@@ -1219,8 -1126,7 +1181,7 @@@ return -ENODATA; if (target == current) save_gs_cb(data); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct gs_cb)); + return membuf_write(&to, data, sizeof(struct gs_cb)); } static int s390_gs_cb_set(struct task_struct *target, @@@ -1264,8 -1170,7 +1225,7 @@@ static int s390_gs_bc_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct gs_cb *data = target->thread.gs_bc_cb; @@@ -1273,8 -1178,7 +1233,7 @@@ return -ENODEV; if (!data) return -ENODATA; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct gs_cb)); + return membuf_write(&to, data, sizeof(struct gs_cb)); } static int s390_gs_bc_set(struct task_struct *target, @@@ -1325,8 -1229,7 +1284,7 @@@ static bool is_ri_cb_valid(struct runti static int s390_runtime_instr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct runtime_instr_cb *data = target->thread.ri_cb; @@@ -1335,8 -1238,7 +1293,7 @@@ if (!data) return -ENODATA; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct runtime_instr_cb)); + return membuf_write(&to, data, sizeof(struct runtime_instr_cb)); } static int s390_runtime_instr_set(struct task_struct *target, @@@ -1392,7 -1294,7 +1349,7 @@@ static const struct user_regset s390_re .n = sizeof(s390_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = s390_regs_get, + .regset_get = s390_regs_get, .set = s390_regs_set, }, { @@@ -1400,7 -1302,7 +1357,7 @@@ .n = sizeof(s390_fp_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = s390_fpregs_get, + .regset_get = s390_fpregs_get, .set = s390_fpregs_set, }, { @@@ -1408,7 -1310,7 +1365,7 @@@ .n = 1, .size = sizeof(unsigned int), .align = sizeof(unsigned int), - .get = s390_system_call_get, + .regset_get = s390_system_call_get, .set = s390_system_call_set, }, { @@@ -1416,7 -1318,7 +1373,7 @@@ .n = 1, .size = sizeof(long), .align = sizeof(long), - .get = s390_last_break_get, + .regset_get = s390_last_break_get, .set = s390_last_break_set, }, { @@@ -1424,7 -1326,7 +1381,7 @@@ .n = 1, .size = 256, .align = 1, - .get = s390_tdb_get, + .regset_get = s390_tdb_get, .set = s390_tdb_set, }, { @@@ -1432,7 -1334,7 +1389,7 @@@ .n = __NUM_VXRS_LOW, .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_vxrs_low_get, + .regset_get = s390_vxrs_low_get, .set = s390_vxrs_low_set, }, { @@@ -1440,7 -1342,7 +1397,7 @@@ .n = __NUM_VXRS_HIGH, .size = sizeof(__vector128), .align = sizeof(__vector128), - .get = s390_vxrs_high_get, + .regset_get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, { @@@ -1448,7 -1350,7 +1405,7 @@@ .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_cb_get, + .regset_get = s390_gs_cb_get, .set = s390_gs_cb_set, }, { @@@ -1456,7 -1358,7 +1413,7 @@@ .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_bc_get, + .regset_get = s390_gs_bc_get, .set = s390_gs_bc_set, }, { @@@ -1464,7 -1366,7 +1421,7 @@@ .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_runtime_instr_get, + .regset_get = s390_runtime_instr_get, .set = s390_runtime_instr_set, }, }; @@@ -1479,28 -1381,15 +1436,15 @@@ static const struct user_regset_view us #ifdef CONFIG_COMPAT static int s390_compat_regs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { + unsigned n; + if (target == current) save_access_regs(target->thread.acrs); - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count > 0) { - *k++ = __peek_user_compat(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count > 0) { - if (__put_user(__peek_user_compat(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t)) + membuf_store(&to, __peek_user_compat(target, n)); return 0; } @@@ -1542,29 -1431,14 +1486,14 @@@ static int s390_compat_regs_set(struct static int s390_compat_regs_high_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { compat_ulong_t *gprs_high; + int i; - gprs_high = (compat_ulong_t *) - &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count > 0) { - *k++ = *gprs_high; - gprs_high += 2; - count -= sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count > 0) { - if (__put_user(*gprs_high, u++)) - return -EFAULT; - gprs_high += 2; - count -= sizeof(*u); - } - } + gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs; + for (i = 0; i < NUM_GPRS; i++, gprs_high += 2) + membuf_store(&to, *gprs_high); return 0; } @@@ -1603,23 -1477,11 +1532,11 @@@ static int s390_compat_regs_high_set(st static int s390_compat_last_break_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - compat_ulong_t last_break; + compat_ulong_t last_break = target->thread.last_break; - if (count > 0) { - last_break = target->thread.last_break; - if (kbuf) { - unsigned long *k = kbuf; - *k = last_break; - } else { - unsigned long __user *u = ubuf; - if (__put_user(last_break, u)) - return -EFAULT; - } - } - return 0; + return membuf_store(&to, (unsigned long)last_break); } static int s390_compat_last_break_set(struct task_struct *target, @@@ -1636,7 -1498,7 +1553,7 @@@ static const struct user_regset s390_co .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = s390_compat_regs_get, + .regset_get = s390_compat_regs_get, .set = s390_compat_regs_set, }, { @@@ -1644,7 -1506,7 +1561,7 @@@ .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = s390_fpregs_get, + .regset_get = s390_fpregs_get, .set = s390_fpregs_set, }, { @@@ -1652,7 -1514,7 +1569,7 @@@ .n = 1, .size = sizeof(compat_uint_t), .align = sizeof(compat_uint_t), - .get = s390_system_call_get, + .regset_get = s390_system_call_get, .set = s390_system_call_set, }, { @@@ -1660,7 -1522,7 +1577,7 @@@ .n = 1, .size = sizeof(long), .align = sizeof(long), - .get = s390_compat_last_break_get, + .regset_get = s390_compat_last_break_get, .set = s390_compat_last_break_set, }, { @@@ -1668,7 -1530,7 +1585,7 @@@ .n = 1, .size = 256, .align = 1, - .get = s390_tdb_get, + .regset_get = s390_tdb_get, .set = s390_tdb_set, }, { @@@ -1676,7 -1538,7 +1593,7 @@@ .n = __NUM_VXRS_LOW, .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_vxrs_low_get, + .regset_get = s390_vxrs_low_get, .set = s390_vxrs_low_set, }, { @@@ -1684,7 -1546,7 +1601,7 @@@ .n = __NUM_VXRS_HIGH, .size = sizeof(__vector128), .align = sizeof(__vector128), - .get = s390_vxrs_high_get, + .regset_get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, { @@@ -1692,7 -1554,7 +1609,7 @@@ .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = s390_compat_regs_high_get, + .regset_get = s390_compat_regs_high_get, .set = s390_compat_regs_high_set, }, { @@@ -1700,7 -1562,7 +1617,7 @@@ .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_cb_get, + .regset_get = s390_gs_cb_get, .set = s390_gs_cb_set, }, { @@@ -1708,7 -1570,7 +1625,7 @@@ .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_bc_get, + .regset_get = s390_gs_bc_get, .set = s390_gs_bc_set, }, { @@@ -1716,7 -1578,7 +1633,7 @@@ .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_runtime_instr_get, + .regset_get = s390_runtime_instr_get, .set = s390_runtime_instr_set, }, }; diff --combined arch/sh/kernel/process_32.c index b0fefd8f53a6,6ab397bc47ed..cde0a66116d2 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@@ -103,9 -103,8 +103,8 @@@ int dump_fpu(struct pt_regs *regs, elf_ fpvalid = !!tsk_used_math(tsk); if (fpvalid) - fpvalid = !fpregs_get(tsk, NULL, 0, - sizeof(struct user_fpu_struct), - fpu, NULL); + fpvalid = !fpregs_get(tsk, NULL, + (struct membuf){fpu, sizeof(*fpu)}); #endif return fpvalid; @@@ -115,8 -114,8 +114,8 @@@ EXPORT_SYMBOL(dump_fpu) asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, struct task_struct *p) +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, + struct task_struct *p, unsigned long tls) { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; @@@ -158,7 -157,7 +157,7 @@@ ti->addr_limit = USER_DS; if (clone_flags & CLONE_SETTLS) - childregs->gbr = childregs->regs[0]; + childregs->gbr = tls; childregs->regs[0] = 0; /* Set return value for child */ p->thread.pc = (unsigned long) ret_from_fork; diff --combined arch/x86/include/asm/fpu/internal.h index 6b10cdaa7c96,0c85a08176c0..0a460f2a3f90 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@@ -34,7 -34,6 +34,6 @@@ extern int fpu__copy(struct task_struc extern void fpu__clear_user_states(struct fpu *fpu); extern void fpu__clear_all(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); - extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); /* * Boot time FPU initialization functions: @@@ -274,7 -273,7 +273,7 @@@ static inline void copy_fxregs_to_kerne */ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) { - u64 mask = -1; + u64 mask = xfeatures_mask_all; u32 lmask = mask; u32 hmask = mask >> 32; int err; @@@ -320,7 -319,7 +319,7 @@@ static inline void copy_kernel_to_xregs */ static inline void copy_xregs_to_kernel(struct xregs_state *xstate) { - u64 mask = -1; + u64 mask = xfeatures_mask_all; u32 lmask = mask; u32 hmask = mask >> 32; int err; @@@ -356,9 -355,6 +355,9 @@@ static inline void copy_kernel_to_xregs */ static inline int copy_xregs_to_user(struct xregs_state __user *buf) { + u64 mask = xfeatures_mask_user(); + u32 lmask = mask; + u32 hmask = mask >> 32; int err; /* @@@ -370,7 -366,7 +369,7 @@@ return -EFAULT; stac(); - XSTATE_OP(XSAVE, buf, -1, -1, err); + XSTATE_OP(XSAVE, buf, lmask, hmask, err); clac(); return err; @@@ -411,7 -407,43 +410,7 @@@ static inline int copy_kernel_to_xregs_ return err; } -/* - * These must be called with preempt disabled. Returns - * 'true' if the FPU state is still intact and we can - * keep registers active. - * - * The legacy FNSAVE instruction cleared all FPU state - * unconditionally, so registers are essentially destroyed. - * Modern FPU state can be kept in registers, if there are - * no pending FP exceptions. - */ -static inline int copy_fpregs_to_fpstate(struct fpu *fpu) -{ - if (likely(use_xsave())) { - copy_xregs_to_kernel(&fpu->state.xsave); - - /* - * AVX512 state is tracked here because its use is - * known to slow the max clock speed of the core. - */ - if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512) - fpu->avx512_timestamp = jiffies; - return 1; - } - - if (likely(use_fxsr())) { - copy_fxregs_to_kernel(fpu); - return 1; - } - - /* - * Legacy FPU register saving, FNSAVE always clears FPU registers, - * so we have to mark them inactive: - */ - asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); - - return 0; -} +extern int copy_fpregs_to_fpstate(struct fpu *fpu); static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) { @@@ -590,11 -622,6 +589,11 @@@ static inline void switch_fpu_finish(st * MXCSR and XCR definitions: */ +static inline void ldmxcsr(u32 mxcsr) +{ + asm volatile("ldmxcsr %0" :: "m" (mxcsr)); +} + extern unsigned int mxcsr_feature_mask; #define XCR_XFEATURE_ENABLED_MASK 0x00000000 diff --combined arch/x86/include/asm/fpu/xstate.h index 1559554af931,f691ea1bc086..14ab815132d4 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@@ -21,8 -21,6 +21,8 @@@ #define XSAVE_YMM_SIZE 256 #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) +#define XSAVE_ALIGNMENT 64 + /* All currently supported user features */ #define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \ XFEATURE_MASK_SSE | \ @@@ -37,27 -35,6 +37,27 @@@ /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0) +/* + * A supervisor state component may not always contain valuable information, + * and its size may be huge. Saving/restoring such supervisor state components + * at each context switch can cause high CPU and space overhead, which should + * be avoided. Such supervisor state components should only be saved/restored + * on demand. The on-demand dynamic supervisor features are set in this mask. + * + * Unlike the existing supported supervisor features, a dynamic supervisor + * feature does not allocate a buffer in task->fpu, and the corresponding + * supervisor state component cannot be saved/restored at each context switch. + * + * To support a dynamic supervisor feature, a developer should follow the + * dos and don'ts as below: + * - Do dynamically allocate a buffer for the supervisor state component. + * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the + * state component to/from the buffer. + * - Don't set the bit corresponding to the dynamic supervisor feature in + * IA32_XSS at run time, since it has been set at boot time. + */ +#define XFEATURE_MASK_DYNAMIC (XFEATURE_MASK_LBR) + /* * Unsupported supervisor features. When a supervisor feature in this mask is * supported in the future, move it to the supported supervisor feature mask. @@@ -66,7 -43,6 +66,7 @@@ /* All supervisor states including supported and unsupported states. */ #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ + XFEATURE_MASK_DYNAMIC | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) #ifdef CONFIG_X86_64 @@@ -87,14 -63,6 +87,14 @@@ static inline u64 xfeatures_mask_user(v return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; } +static inline u64 xfeatures_mask_dynamic(void) +{ + if (!boot_cpu_has(X86_FEATURE_ARCH_LBR)) + return XFEATURE_MASK_DYNAMIC & ~XFEATURE_MASK_LBR; + + return XFEATURE_MASK_DYNAMIC; +} + extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, @@@ -103,15 -71,11 +103,15 @@@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); +int xfeature_size(int xfeature_nr); - int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); - int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); + struct membuf; + void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave); int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); void copy_supervisor_to_kernel(struct xregs_state *xsave); +void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); +void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask); + /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ int validate_user_xstate_header(const struct xstate_header *hdr); diff --combined arch/x86/kernel/fpu/xstate.c index be2a68a09d19,4f8e4287c176..7a2bf884fede --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@@ -233,10 -233,8 +233,10 @@@ void fpu__init_cpu_xstate(void /* * MSR_IA32_XSS sets supervisor states managed by XSAVES. */ - if (boot_cpu_has(X86_FEATURE_XSAVES)) - wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); + if (boot_cpu_has(X86_FEATURE_XSAVES)) { + wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | + xfeatures_mask_dynamic()); + } } static bool xfeature_enabled(enum xfeature xfeature) @@@ -488,7 -486,7 +488,7 @@@ static int xfeature_uncompacted_offset( return ebx; } -static int xfeature_size(int xfeature_nr) +int xfeature_size(int xfeature_nr) { u32 eax, ebx, ecx, edx; @@@ -600,8 -598,7 +600,8 @@@ static void check_xstate_against_struct */ if ((nr < XFEATURE_YMM) || (nr >= XFEATURE_MAX) || - (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) { + (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) || + ((nr >= XFEATURE_RSRVD_COMP_10) && (nr <= XFEATURE_LBR))) { WARN_ONCE(1, "no structure for xstate: %d\n", nr); XSTATE_WARN_ON(1); } @@@ -850,10 -847,8 +850,10 @@@ void fpu__resume_cpu(void * Restore IA32_XSS. The same CPUID bit enumerates support * of XSAVES and MSR_IA32_XSS. */ - if (boot_cpu_has(X86_FEATURE_XSAVES)) - wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); + if (boot_cpu_has(X86_FEATURE_XSAVES)) { + wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | + xfeatures_mask_dynamic()); + } } /* @@@ -1014,32 -1009,20 +1014,20 @@@ static inline bool xfeatures_mxcsr_quir return true; } - static void fill_gap(unsigned to, void **kbuf, unsigned *pos, unsigned *count) + static void fill_gap(struct membuf *to, unsigned *last, unsigned offset) { - if (*pos < to) { - unsigned size = to - *pos; - - if (size > *count) - size = *count; - memcpy(*kbuf, (void *)&init_fpstate.xsave + *pos, size); - *kbuf += size; - *pos += size; - *count -= size; - } + if (*last >= offset) + return; + membuf_write(to, (void *)&init_fpstate.xsave + *last, offset - *last); + *last = offset; } - static void copy_part(unsigned offset, unsigned size, void *from, - void **kbuf, unsigned *pos, unsigned *count) + static void copy_part(struct membuf *to, unsigned *last, unsigned offset, + unsigned size, void *from) { - fill_gap(offset, kbuf, pos, count); - if (size > *count) - size = *count; - if (size) { - memcpy(*kbuf, from, size); - *kbuf += size; - *pos += size; - *count -= size; - } + fill_gap(to, last, offset); + membuf_write(to, from, size); + *last = offset + size; } /* @@@ -1049,19 -1032,14 +1037,14 @@@ * It supports partial copy but pos always starts from zero. This is called * from xstateregs_get() and there we check the CPU has XSAVES. */ - int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) + void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave) { struct xstate_header header; const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr); - unsigned count = size_total; + unsigned size = to.left; + unsigned last = 0; int i; - /* - * Currently copy_regset_to_user() starts from pos 0: - */ - if (unlikely(offset_start != 0)) - return -EFAULT; - /* * The destination is a ptrace buffer; we put in only user xstates: */ @@@ -1070,27 -1048,26 +1053,26 @@@ header.xfeatures &= xfeatures_mask_user(); if (header.xfeatures & XFEATURE_MASK_FP) - copy_part(0, off_mxcsr, - &xsave->i387, &kbuf, &offset_start, &count); + copy_part(&to, &last, 0, off_mxcsr, &xsave->i387); if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)) - copy_part(off_mxcsr, MXCSR_AND_FLAGS_SIZE, - &xsave->i387.mxcsr, &kbuf, &offset_start, &count); + copy_part(&to, &last, off_mxcsr, + MXCSR_AND_FLAGS_SIZE, &xsave->i387.mxcsr); if (header.xfeatures & XFEATURE_MASK_FP) - copy_part(offsetof(struct fxregs_state, st_space), 128, - &xsave->i387.st_space, &kbuf, &offset_start, &count); + copy_part(&to, &last, offsetof(struct fxregs_state, st_space), + 128, &xsave->i387.st_space); if (header.xfeatures & XFEATURE_MASK_SSE) - copy_part(xstate_offsets[XFEATURE_SSE], 256, - &xsave->i387.xmm_space, &kbuf, &offset_start, &count); + copy_part(&to, &last, xstate_offsets[XFEATURE_SSE], + 256, &xsave->i387.xmm_space); /* * Fill xsave->i387.sw_reserved value for ptrace frame: */ - copy_part(offsetof(struct fxregs_state, sw_reserved), 48, - xstate_fx_sw_bytes, &kbuf, &offset_start, &count); + copy_part(&to, &last, offsetof(struct fxregs_state, sw_reserved), + 48, xstate_fx_sw_bytes); /* * Copy xregs_state->header: */ - copy_part(offsetof(struct xregs_state, header), sizeof(header), - &header, &kbuf, &offset_start, &count); + copy_part(&to, &last, offsetof(struct xregs_state, header), + sizeof(header), &header); for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { /* @@@ -1099,104 -1076,12 +1081,12 @@@ if ((header.xfeatures >> i) & 1) { void *src = __raw_xsave_addr(xsave, i); - copy_part(xstate_offsets[i], xstate_sizes[i], - src, &kbuf, &offset_start, &count); + copy_part(&to, &last, xstate_offsets[i], + xstate_sizes[i], src); } } - fill_gap(size_total, &kbuf, &offset_start, &count); - - return 0; - } - - static inline int - __copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total) - { - if (!size) - return 0; - - if (offset < size_total) { - unsigned int copy = min(size, size_total - offset); - - if (__copy_to_user(ubuf + offset, data, copy)) - return -EFAULT; - } - return 0; - } - - /* - * Convert from kernel XSAVES compacted format to standard format and copy - * to a user-space buffer. It supports partial copy but pos always starts from - * zero. This is called from xstateregs_get() and there we check the CPU - * has XSAVES. - */ - int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) - { - unsigned int offset, size; - int ret, i; - struct xstate_header header; - - /* - * Currently copy_regset_to_user() starts from pos 0: - */ - if (unlikely(offset_start != 0)) - return -EFAULT; - - /* - * The destination is a ptrace buffer; we put in only user xstates: - */ - memset(&header, 0, sizeof(header)); - header.xfeatures = xsave->header.xfeatures; - header.xfeatures &= xfeatures_mask_user(); - - /* - * Copy xregs_state->header: - */ - offset = offsetof(struct xregs_state, header); - size = sizeof(header); - - ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total); - if (ret) - return ret; - - for (i = 0; i < XFEATURE_MAX; i++) { - /* - * Copy only in-use xstates: - */ - if ((header.xfeatures >> i) & 1) { - void *src = __raw_xsave_addr(xsave, i); - - offset = xstate_offsets[i]; - size = xstate_sizes[i]; - - /* The next component has to fit fully into the output buffer: */ - if (offset + size > size_total) - break; - - ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total); - if (ret) - return ret; - } - - } - - if (xfeatures_mxcsr_quirk(header.xfeatures)) { - offset = offsetof(struct fxregs_state, mxcsr); - size = MXCSR_AND_FLAGS_SIZE; - __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total); - } - - /* - * Fill xsave->i387.sw_reserved value for ptrace frame: - */ - offset = offsetof(struct fxregs_state, sw_reserved); - size = sizeof(xstate_fx_sw_bytes); - - ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total); - if (ret) - return ret; - - return 0; + fill_gap(&to, &last, size); } /* @@@ -1361,78 -1246,6 +1251,78 @@@ void copy_supervisor_to_kernel(struct x } } +/** + * copy_dynamic_supervisor_to_kernel() - Save dynamic supervisor states to + * an xsave area + * @xstate: A pointer to an xsave area + * @mask: Represent the dynamic supervisor features saved into the xsave area + * + * Only the dynamic supervisor states sets in the mask are saved into the xsave + * area (See the comment in XFEATURE_MASK_DYNAMIC for the details of dynamic + * supervisor feature). Besides the dynamic supervisor states, the legacy + * region and XSAVE header are also saved into the xsave area. The supervisor + * features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and + * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not saved. + * + * The xsave area must be 64-bytes aligned. + */ +void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask) +{ + u64 dynamic_mask = xfeatures_mask_dynamic() & mask; + u32 lmask, hmask; + int err; + + if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES))) + return; + + if (WARN_ON_FPU(!dynamic_mask)) + return; + + lmask = dynamic_mask; + hmask = dynamic_mask >> 32; + + XSTATE_OP(XSAVES, xstate, lmask, hmask, err); + + /* Should never fault when copying to a kernel buffer */ + WARN_ON_FPU(err); +} + +/** + * copy_kernel_to_dynamic_supervisor() - Restore dynamic supervisor states from + * an xsave area + * @xstate: A pointer to an xsave area + * @mask: Represent the dynamic supervisor features restored from the xsave area + * + * Only the dynamic supervisor states sets in the mask are restored from the + * xsave area (See the comment in XFEATURE_MASK_DYNAMIC for the details of + * dynamic supervisor feature). Besides the dynamic supervisor states, the + * legacy region and XSAVE header are also restored from the xsave area. The + * supervisor features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and + * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not restored. + * + * The xsave area must be 64-bytes aligned. + */ +void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask) +{ + u64 dynamic_mask = xfeatures_mask_dynamic() & mask; + u32 lmask, hmask; + int err; + + if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES))) + return; + + if (WARN_ON_FPU(!dynamic_mask)) + return; + + lmask = dynamic_mask; + hmask = dynamic_mask >> 32; + + XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); + + /* Should never fault when copying from a kernel buffer */ + WARN_ON_FPU(err); +} + #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 diff --combined arch/x86/kernel/ptrace.c index 3f006489087f,8413cb1285a7..5679aa3fdcb8 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@@ -281,9 -281,17 +281,9 @@@ static int set_segment_reg(struct task_ return -EIO; /* - * This function has some ABI oddities. - * - * A 32-bit ptracer probably expects that writing FS or GS will change - * FSBASE or GSBASE respectively. In the absence of FSGSBASE support, - * this code indeed has that effect. When FSGSBASE is added, this - * will require a special case. - * - * For existing 64-bit ptracers, writing FS or GS *also* currently - * changes the base if the selector is nonzero the next time the task - * is run. This behavior may not be needed, and trying to preserve it - * when FSGSBASE is added would be complicated at best. + * Writes to FS and GS will change the stored selector. Whether + * this changes the segment base as well depends on whether + * FSGSBASE is enabled. */ switch (offset) { @@@ -371,12 -379,25 +371,12 @@@ static int putreg(struct task_struct *c case offsetof(struct user_regs_struct,fs_base): if (value >= TASK_SIZE_MAX) return -EIO; - /* - * When changing the FS base, use do_arch_prctl_64() - * to set the index to zero and to set the base - * as requested. - * - * NB: This behavior is nonsensical and likely needs to - * change when FSGSBASE support is added. - */ - if (child->thread.fsbase != value) - return do_arch_prctl_64(child, ARCH_SET_FS, value); + x86_fsbase_write_task(child, value); return 0; case offsetof(struct user_regs_struct,gs_base): - /* - * Exactly the same here as the %fs handling above. - */ if (value >= TASK_SIZE_MAX) return -EIO; - if (child->thread.gsbase != value) - return do_arch_prctl_64(child, ARCH_SET_GS, value); + x86_gsbase_write_task(child, value); return 0; #endif } @@@ -412,26 -433,12 +412,12 @@@ static unsigned long getreg(struct task static int genregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - if (kbuf) { - unsigned long *k = kbuf; - while (count >= sizeof(*k)) { - *k++ = getreg(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - unsigned long __user *u = ubuf; - while (count >= sizeof(*u)) { - if (__put_user(getreg(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + int reg; + for (reg = 0; to.left; reg++) + membuf_store(&to, getreg(target, reg * sizeof(unsigned long))); return 0; } @@@ -695,16 -702,14 +681,14 @@@ static int ioperm_active(struct task_st static int ioperm_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct io_bitmap *iobm = target->thread.io_bitmap; if (!iobm) return -ENXIO; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - iobm->bitmap, 0, IO_BITMAP_BYTES); + return membuf_write(&to, iobm->bitmap, IO_BITMAP_BYTES); } /* @@@ -859,39 -864,14 +843,39 @@@ long arch_ptrace(struct task_struct *ch static int putreg32(struct task_struct *child, unsigned regno, u32 value) { struct pt_regs *regs = task_pt_regs(child); + int ret; switch (regno) { SEG32(cs); SEG32(ds); SEG32(es); - SEG32(fs); - SEG32(gs); + + /* + * A 32-bit ptracer on a 64-bit kernel expects that writing + * FS or GS will also update the base. This is needed for + * operations like PTRACE_SETREGS to fully restore a saved + * CPU state. + */ + + case offsetof(struct user32, regs.fs): + ret = set_segment_reg(child, + offsetof(struct user_regs_struct, fs), + value); + if (ret == 0) + child->thread.fsbase = + x86_fsgsbase_read_task(child, value); + return ret; + + case offsetof(struct user32, regs.gs): + ret = set_segment_reg(child, + offsetof(struct user_regs_struct, gs), + value); + if (ret == 0) + child->thread.gsbase = + x86_fsgsbase_read_task(child, value); + return ret; + SEG32(ss); R32(ebx, bx); @@@ -1007,28 -987,15 +991,15 @@@ static int getreg32(struct task_struct static int genregs32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count >= sizeof(*k)) { - getreg32(target, pos, k++); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count >= sizeof(*u)) { - compat_ulong_t word; - getreg32(target, pos, &word); - if (__put_user(word, u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + int reg; + for (reg = 0; to.left; reg++) { + u32 val; + getreg32(target, reg * 4, &val); + membuf_store(&to, val); + } return 0; } @@@ -1238,25 -1205,25 +1209,25 @@@ static struct user_regset x86_64_regset .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = genregs_get, .set = genregs_set + .regset_get = genregs_get, .set = genregs_set }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_i387_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set + .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, [REGSET_XSTATE] = { .core_note_type = NT_X86_XSTATE, .size = sizeof(u64), .align = sizeof(u64), - .active = xstateregs_active, .get = xstateregs_get, + .active = xstateregs_active, .regset_get = xstateregs_get, .set = xstateregs_set }, [REGSET_IOPERM64] = { .core_note_type = NT_386_IOPERM, .n = IO_BITMAP_LONGS, .size = sizeof(long), .align = sizeof(long), - .active = ioperm_active, .get = ioperm_get + .active = ioperm_active, .regset_get = ioperm_get }, }; @@@ -1279,24 -1246,24 +1250,24 @@@ static struct user_regset x86_32_regset .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct32) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = genregs32_get, .set = genregs32_set + .regset_get = genregs32_get, .set = genregs32_set }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set + .active = regset_fpregs_active, .regset_get = fpregs_get, .set = fpregs_set }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set + .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, [REGSET_XSTATE] = { .core_note_type = NT_X86_XSTATE, .size = sizeof(u64), .align = sizeof(u64), - .active = xstateregs_active, .get = xstateregs_get, + .active = xstateregs_active, .regset_get = xstateregs_get, .set = xstateregs_set }, [REGSET_TLS] = { @@@ -1305,13 -1272,13 +1276,13 @@@ .size = sizeof(struct user_desc), .align = sizeof(struct user_desc), .active = regset_tls_active, - .get = regset_tls_get, .set = regset_tls_set + .regset_get = regset_tls_get, .set = regset_tls_set }, [REGSET_IOPERM32] = { .core_note_type = NT_386_IOPERM, .n = IO_BITMAP_BYTES / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = ioperm_active, .get = ioperm_get + .active = ioperm_active, .regset_get = ioperm_get }, }; diff --combined arch/xtensa/kernel/ptrace.c index ce4a32bd2294,620b8863b06e..bb3f4797d212 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@@ -12,7 -12,6 +12,7 @@@ * Marc Gauthier */ +#include #include #include #include @@@ -22,7 -21,6 +22,7 @@@ #include #include #include +#include #include #include #include @@@ -39,8 -37,7 +39,7 @@@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_pt_regs(target); struct user_pt_regs newregs = { @@@ -53,7 -50,6 +52,7 @@@ .threadptr = regs->threadptr, .windowbase = regs->windowbase, .windowstart = regs->windowstart, + .syscall = regs->syscall, }; memcpy(newregs.a, @@@ -63,8 -59,7 +62,7 @@@ regs->areg, (WSBITS - regs->windowbase) * 16); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &newregs, 0, -1); + return membuf_write(&to, &newregs, sizeof(newregs)); } static int gpr_set(struct task_struct *target, @@@ -93,9 -88,6 +91,9 @@@ regs->sar = newregs.sar; regs->threadptr = newregs.threadptr; + if (newregs.syscall) + regs->syscall = newregs.syscall; + if (newregs.windowbase != regs->windowbase || newregs.windowstart != regs->windowstart) { u32 rotws, wmask; @@@ -121,8 -113,7 +119,7 @@@ static int tie_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { int ret; struct pt_regs *regs = task_pt_regs(target); @@@ -147,8 -138,7 +144,7 @@@ newregs->cp6 = ti->xtregs_cp.cp6; newregs->cp7 = ti->xtregs_cp.cp7; #endif - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - newregs, 0, -1); + ret = membuf_write(&to, newregs, sizeof(*newregs)); kfree(newregs); return ret; } @@@ -203,7 -193,7 +199,7 @@@ static const struct user_regset xtensa_ .n = sizeof(struct user_pt_regs) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = gpr_get, + .regset_get = gpr_get, .set = gpr_set, }, [REGSET_TIE] = { @@@ -211,7 -201,7 +207,7 @@@ .n = sizeof(elf_xtregs_t) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = tie_get, + .regset_get = tie_get, .set = tie_set, }, }; @@@ -560,8 -550,7 +556,8 @@@ int do_syscall_trace_enter(struct pt_re return 0; } - if (regs->syscall == NO_SYSCALL) { + if (regs->syscall == NO_SYSCALL || + secure_computing() == -1) { do_syscall_trace_leave(regs); return 0; } @@@ -569,9 -558,6 +565,9 @@@ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, syscall_get_nr(current, regs)); + audit_syscall_entry(regs->syscall, regs->areg[6], + regs->areg[3], regs->areg[4], + regs->areg[5]); return 1; } @@@ -579,8 -565,6 +575,8 @@@ void do_syscall_trace_leave(struct pt_r { int step; + audit_syscall_exit(regs); + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_exit(regs, regs_return_value(regs)); diff --combined kernel/Makefile index bdeb77e27042,e6e03380a0f1..5350fd292910 --- a/kernel/Makefile +++ b/kernel/Makefile @@@ -10,9 -10,8 +10,9 @@@ obj-y = fork.o exec_domain.o panic. extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o smpboot.o ucount.o + async.o range.o smpboot.o ucount.o regset.o +obj-$(CONFIG_BPFILTER) += usermode_driver.o obj-$(CONFIG_MODULES) += kmod.o obj-$(CONFIG_MULTIUSER) += groups.o @@@ -49,7 -48,6 +49,7 @@@ obj-y += irq obj-y += rcu/ obj-y += livepatch/ obj-y += dma/ +obj-y += entry/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o @@@ -127,7 -125,6 +127,7 @@@ obj-$(CONFIG_WATCH_QUEUE) += watch_queu obj-$(CONFIG_SYSCTL_KUNIT_TEST) += sysctl-test.o +CFLAGS_stackleak.o += $(DISABLE_STACKLEAK_PLUGIN) obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o KASAN_SANITIZE_stackleak.o := n KCSAN_SANITIZE_stackleak.o := n