2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
12 #include <linux/module.h>
13 #include <linux/kernel.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
42 #define user_long_t long
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50 int, int, unsigned long);
53 static int load_elf_library(struct file *);
55 #define load_elf_library NULL
59 * If we don't support core dumping, then supply a NULL so we
62 #ifdef CONFIG_ELF_CORE
63 static int elf_core_dump(struct coredump_params *cprm);
65 #define elf_core_dump NULL
68 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
69 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
71 #define ELF_MIN_ALIGN PAGE_SIZE
74 #ifndef ELF_CORE_EFLAGS
75 #define ELF_CORE_EFLAGS 0
78 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
79 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
80 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
82 static struct linux_binfmt elf_format = {
83 .module = THIS_MODULE,
84 .load_binary = load_elf_binary,
85 .load_shlib = load_elf_library,
86 .core_dump = elf_core_dump,
87 .min_coredump = ELF_EXEC_PAGESIZE,
90 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
92 static int set_brk(unsigned long start, unsigned long end)
94 start = ELF_PAGEALIGN(start);
95 end = ELF_PAGEALIGN(end);
98 addr = vm_brk(start, end - start);
102 current->mm->start_brk = current->mm->brk = end;
106 /* We need to explicitly zero any fractional pages
107 after the data section (i.e. bss). This would
108 contain the junk from the file that should not
111 static int padzero(unsigned long elf_bss)
115 nbyte = ELF_PAGEOFFSET(elf_bss);
117 nbyte = ELF_MIN_ALIGN - nbyte;
118 if (clear_user((void __user *) elf_bss, nbyte))
124 /* Let's use some macros to make this stack manipulation a little clearer */
125 #ifdef CONFIG_STACK_GROWSUP
126 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127 #define STACK_ROUND(sp, items) \
128 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
129 #define STACK_ALLOC(sp, len) ({ \
130 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134 #define STACK_ROUND(sp, items) \
135 (((unsigned long) (sp - items)) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
139 #ifndef ELF_BASE_PLATFORM
141 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143 * will be copied to the user stack in the same manner as AT_PLATFORM.
145 #define ELF_BASE_PLATFORM NULL
149 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
150 unsigned long load_addr, unsigned long interp_load_addr)
152 unsigned long p = bprm->p;
153 int argc = bprm->argc;
154 int envc = bprm->envc;
155 elf_addr_t __user *argv;
156 elf_addr_t __user *envp;
157 elf_addr_t __user *sp;
158 elf_addr_t __user *u_platform;
159 elf_addr_t __user *u_base_platform;
160 elf_addr_t __user *u_rand_bytes;
161 const char *k_platform = ELF_PLATFORM;
162 const char *k_base_platform = ELF_BASE_PLATFORM;
163 unsigned char k_rand_bytes[16];
165 elf_addr_t *elf_info;
167 const struct cred *cred = current_cred();
168 struct vm_area_struct *vma;
171 * In some cases (e.g. Hyper-Threading), we want to avoid L1
172 * evictions by the processes running on the same package. One
173 * thing we can do is to shuffle the initial stack for them.
176 p = arch_align_stack(p);
179 * If this architecture has a platform capability string, copy it
180 * to userspace. In some cases (Sparc), this info is impossible
181 * for userspace to get any other way, in others (i386) it is
186 size_t len = strlen(k_platform) + 1;
188 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189 if (__copy_to_user(u_platform, k_platform, len))
194 * If this architecture has a "base" platform capability
195 * string, copy it to userspace.
197 u_base_platform = NULL;
198 if (k_base_platform) {
199 size_t len = strlen(k_base_platform) + 1;
201 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202 if (__copy_to_user(u_base_platform, k_base_platform, len))
207 * Generate 16 random bytes for userspace PRNG seeding.
209 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
210 u_rand_bytes = (elf_addr_t __user *)
211 STACK_ALLOC(p, sizeof(k_rand_bytes));
212 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
215 /* Create the ELF interpreter info */
216 elf_info = (elf_addr_t *)current->mm->saved_auxv;
217 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
218 #define NEW_AUX_ENT(id, val) \
220 elf_info[ei_index++] = id; \
221 elf_info[ei_index++] = val; \
226 * ARCH_DLINFO must come first so PPC can do its special alignment of
228 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
229 * ARCH_DLINFO changes
233 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
234 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
235 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
236 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
237 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
238 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
239 NEW_AUX_ENT(AT_BASE, interp_load_addr);
240 NEW_AUX_ENT(AT_FLAGS, 0);
241 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
242 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
243 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
244 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
245 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
246 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
247 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
249 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
251 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
253 NEW_AUX_ENT(AT_PLATFORM,
254 (elf_addr_t)(unsigned long)u_platform);
256 if (k_base_platform) {
257 NEW_AUX_ENT(AT_BASE_PLATFORM,
258 (elf_addr_t)(unsigned long)u_base_platform);
260 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
261 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
264 /* AT_NULL is zero; clear the rest too */
265 memset(&elf_info[ei_index], 0,
266 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
268 /* And advance past the AT_NULL entry. */
271 sp = STACK_ADD(p, ei_index);
273 items = (argc + 1) + (envc + 1) + 1;
274 bprm->p = STACK_ROUND(sp, items);
276 /* Point sp at the lowest address on the stack */
277 #ifdef CONFIG_STACK_GROWSUP
278 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
279 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
281 sp = (elf_addr_t __user *)bprm->p;
286 * Grow the stack manually; some architectures have a limit on how
287 * far ahead a user-space access may be in order to grow the stack.
289 vma = find_extend_vma(current->mm, bprm->p);
293 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
294 if (__put_user(argc, sp++))
297 envp = argv + argc + 1;
299 /* Populate argv and envp */
300 p = current->mm->arg_end = current->mm->arg_start;
303 if (__put_user((elf_addr_t)p, argv++))
305 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306 if (!len || len > MAX_ARG_STRLEN)
310 if (__put_user(0, argv))
312 current->mm->arg_end = current->mm->env_start = p;
315 if (__put_user((elf_addr_t)p, envp++))
317 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
318 if (!len || len > MAX_ARG_STRLEN)
322 if (__put_user(0, envp))
324 current->mm->env_end = p;
326 /* Put the elf_info on the stack in the right place. */
327 sp = (elf_addr_t __user *)envp + 1;
328 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
335 static unsigned long elf_map(struct file *filep, unsigned long addr,
336 struct elf_phdr *eppnt, int prot, int type,
337 unsigned long total_size)
339 unsigned long map_addr;
340 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
341 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
342 addr = ELF_PAGESTART(addr);
343 size = ELF_PAGEALIGN(size);
345 /* mmap() will return -EINVAL if given a zero size, but a
346 * segment with zero filesize is perfectly valid */
351 * total_size is the size of the ELF (interpreter) image.
352 * The _first_ mmap needs to know the full size, otherwise
353 * randomization might put this image into an overlapping
354 * position with the ELF binary image. (since size < total_size)
355 * So we first map the 'big' image - and unmap the remainder at
356 * the end. (which unmap is needed for ELF images with holes.)
359 total_size = ELF_PAGEALIGN(total_size);
360 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
361 if (!BAD_ADDR(map_addr))
362 vm_munmap(map_addr+size, total_size-size);
364 map_addr = vm_mmap(filep, addr, size, prot, type, off);
369 #endif /* !elf_map */
371 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
373 int i, first_idx = -1, last_idx = -1;
375 for (i = 0; i < nr; i++) {
376 if (cmds[i].p_type == PT_LOAD) {
385 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
386 ELF_PAGESTART(cmds[first_idx].p_vaddr);
390 /* This is much more generalized than the library routine read function,
391 so we keep this separate. Technically the library read function
392 is only provided so that we can read a.out libraries that have
395 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
396 struct file *interpreter, unsigned long *interp_map_addr,
397 unsigned long no_base)
399 struct elf_phdr *elf_phdata;
400 struct elf_phdr *eppnt;
401 unsigned long load_addr = 0;
402 int load_addr_set = 0;
403 unsigned long last_bss = 0, elf_bss = 0;
404 unsigned long error = ~0UL;
405 unsigned long total_size;
408 /* First of all, some simple consistency checks */
409 if (interp_elf_ex->e_type != ET_EXEC &&
410 interp_elf_ex->e_type != ET_DYN)
412 if (!elf_check_arch(interp_elf_ex))
414 if (!interpreter->f_op->mmap)
418 * If the size of this structure has changed, then punt, since
419 * we will be doing the wrong thing.
421 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
423 if (interp_elf_ex->e_phnum < 1 ||
424 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
427 /* Now read in all of the header information */
428 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
429 if (size > ELF_MIN_ALIGN)
431 elf_phdata = kmalloc(size, GFP_KERNEL);
435 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
436 (char *)elf_phdata, size);
438 if (retval != size) {
444 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
451 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
452 if (eppnt->p_type == PT_LOAD) {
453 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
455 unsigned long vaddr = 0;
456 unsigned long k, map_addr;
458 if (eppnt->p_flags & PF_R)
459 elf_prot = PROT_READ;
460 if (eppnt->p_flags & PF_W)
461 elf_prot |= PROT_WRITE;
462 if (eppnt->p_flags & PF_X)
463 elf_prot |= PROT_EXEC;
464 vaddr = eppnt->p_vaddr;
465 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
466 elf_type |= MAP_FIXED;
467 else if (no_base && interp_elf_ex->e_type == ET_DYN)
470 map_addr = elf_map(interpreter, load_addr + vaddr,
471 eppnt, elf_prot, elf_type, total_size);
473 if (!*interp_map_addr)
474 *interp_map_addr = map_addr;
476 if (BAD_ADDR(map_addr))
479 if (!load_addr_set &&
480 interp_elf_ex->e_type == ET_DYN) {
481 load_addr = map_addr - ELF_PAGESTART(vaddr);
486 * Check to see if the section's size will overflow the
487 * allowed task size. Note that p_filesz must always be
488 * <= p_memsize so it's only necessary to check p_memsz.
490 k = load_addr + eppnt->p_vaddr;
492 eppnt->p_filesz > eppnt->p_memsz ||
493 eppnt->p_memsz > TASK_SIZE ||
494 TASK_SIZE - eppnt->p_memsz < k) {
500 * Find the end of the file mapping for this phdr, and
501 * keep track of the largest address we see for this.
503 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
508 * Do the same thing for the memory mapping - between
509 * elf_bss and last_bss is the bss section.
511 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
517 if (last_bss > elf_bss) {
519 * Now fill out the bss section. First pad the last page up
520 * to the page boundary, and then perform a mmap to make sure
521 * that there are zero-mapped pages up to and including the
524 if (padzero(elf_bss)) {
529 /* What we have mapped so far */
530 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
532 /* Map the last of the bss segment */
533 error = vm_brk(elf_bss, last_bss - elf_bss);
547 * These are the functions used to load ELF style executables and shared
548 * libraries. There is no binary dependent code anywhere else.
551 #ifndef STACK_RND_MASK
552 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
555 static unsigned long randomize_stack_top(unsigned long stack_top)
557 unsigned int random_variable = 0;
559 if ((current->flags & PF_RANDOMIZE) &&
560 !(current->personality & ADDR_NO_RANDOMIZE)) {
561 random_variable = get_random_int() & STACK_RND_MASK;
562 random_variable <<= PAGE_SHIFT;
564 #ifdef CONFIG_STACK_GROWSUP
565 return PAGE_ALIGN(stack_top) + random_variable;
567 return PAGE_ALIGN(stack_top) - random_variable;
571 static int load_elf_binary(struct linux_binprm *bprm)
573 struct file *interpreter = NULL; /* to shut gcc up */
574 unsigned long load_addr = 0, load_bias = 0;
575 int load_addr_set = 0;
576 char * elf_interpreter = NULL;
578 struct elf_phdr *elf_ppnt, *elf_phdata;
579 unsigned long elf_bss, elf_brk;
582 unsigned long elf_entry;
583 unsigned long interp_load_addr = 0;
584 unsigned long start_code, end_code, start_data, end_data;
585 unsigned long reloc_func_desc __maybe_unused = 0;
586 int executable_stack = EXSTACK_DEFAULT;
587 unsigned long def_flags = 0;
588 struct pt_regs *regs = current_pt_regs();
590 struct elfhdr elf_ex;
591 struct elfhdr interp_elf_ex;
594 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
600 /* Get the exec-header */
601 loc->elf_ex = *((struct elfhdr *)bprm->buf);
604 /* First of all, some simple consistency checks */
605 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
608 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
610 if (!elf_check_arch(&loc->elf_ex))
612 if (!bprm->file->f_op->mmap)
615 /* Now read in all of the header information */
616 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
618 if (loc->elf_ex.e_phnum < 1 ||
619 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
621 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
623 elf_phdata = kmalloc(size, GFP_KERNEL);
627 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
628 (char *)elf_phdata, size);
629 if (retval != size) {
635 elf_ppnt = elf_phdata;
644 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
645 if (elf_ppnt->p_type == PT_INTERP) {
646 /* This is the program interpreter used for
647 * shared libraries - for now assume that this
648 * is an a.out format binary
651 if (elf_ppnt->p_filesz > PATH_MAX ||
652 elf_ppnt->p_filesz < 2)
656 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
658 if (!elf_interpreter)
661 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
664 if (retval != elf_ppnt->p_filesz) {
667 goto out_free_interp;
669 /* make sure path is NULL terminated */
671 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
672 goto out_free_interp;
674 interpreter = open_exec(elf_interpreter);
675 retval = PTR_ERR(interpreter);
676 if (IS_ERR(interpreter))
677 goto out_free_interp;
680 * If the binary is not readable then enforce
681 * mm->dumpable = 0 regardless of the interpreter's
684 would_dump(bprm, interpreter);
686 retval = kernel_read(interpreter, 0, bprm->buf,
688 if (retval != BINPRM_BUF_SIZE) {
691 goto out_free_dentry;
694 /* Get the exec headers */
695 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
701 elf_ppnt = elf_phdata;
702 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
703 if (elf_ppnt->p_type == PT_GNU_STACK) {
704 if (elf_ppnt->p_flags & PF_X)
705 executable_stack = EXSTACK_ENABLE_X;
707 executable_stack = EXSTACK_DISABLE_X;
711 /* Some simple consistency checks for the interpreter */
712 if (elf_interpreter) {
714 /* Not an ELF interpreter */
715 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
716 goto out_free_dentry;
717 /* Verify the interpreter has a valid arch */
718 if (!elf_check_arch(&loc->interp_elf_ex))
719 goto out_free_dentry;
722 /* Flush all traces of the currently running executable */
723 retval = flush_old_exec(bprm);
725 goto out_free_dentry;
727 /* OK, This is the point of no return */
728 current->mm->def_flags = def_flags;
730 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
731 may depend on the personality. */
732 SET_PERSONALITY(loc->elf_ex);
733 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
734 current->personality |= READ_IMPLIES_EXEC;
736 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
737 current->flags |= PF_RANDOMIZE;
739 setup_new_exec(bprm);
741 /* Do this so that we can load the interpreter, if need be. We will
742 change some of these later */
743 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
746 send_sig(SIGKILL, current, 0);
747 goto out_free_dentry;
750 current->mm->start_stack = bprm->p;
752 /* Now we do a little grungy work by mmapping the ELF image into
753 the correct location in memory. */
754 for(i = 0, elf_ppnt = elf_phdata;
755 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
756 int elf_prot = 0, elf_flags;
757 unsigned long k, vaddr;
759 if (elf_ppnt->p_type != PT_LOAD)
762 if (unlikely (elf_brk > elf_bss)) {
765 /* There was a PT_LOAD segment with p_memsz > p_filesz
766 before this one. Map anonymous pages, if needed,
767 and clear the area. */
768 retval = set_brk(elf_bss + load_bias,
769 elf_brk + load_bias);
771 send_sig(SIGKILL, current, 0);
772 goto out_free_dentry;
774 nbyte = ELF_PAGEOFFSET(elf_bss);
776 nbyte = ELF_MIN_ALIGN - nbyte;
777 if (nbyte > elf_brk - elf_bss)
778 nbyte = elf_brk - elf_bss;
779 if (clear_user((void __user *)elf_bss +
782 * This bss-zeroing can fail if the ELF
783 * file specifies odd protections. So
784 * we don't check the return value
790 if (elf_ppnt->p_flags & PF_R)
791 elf_prot |= PROT_READ;
792 if (elf_ppnt->p_flags & PF_W)
793 elf_prot |= PROT_WRITE;
794 if (elf_ppnt->p_flags & PF_X)
795 elf_prot |= PROT_EXEC;
797 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
799 vaddr = elf_ppnt->p_vaddr;
800 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
801 elf_flags |= MAP_FIXED;
802 } else if (loc->elf_ex.e_type == ET_DYN) {
803 /* Try and get dynamic programs out of the way of the
804 * default mmap base, as well as whatever program they
805 * might try to exec. This is because the brk will
806 * follow the loader, and is not movable. */
807 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
808 /* Memory randomization might have been switched off
809 * in runtime via sysctl or explicit setting of
811 * If that is the case, retain the original non-zero
812 * load_bias value in order to establish proper
813 * non-randomized mappings.
815 if (current->flags & PF_RANDOMIZE)
818 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
820 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
824 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
825 elf_prot, elf_flags, 0);
826 if (BAD_ADDR(error)) {
827 send_sig(SIGKILL, current, 0);
828 retval = IS_ERR((void *)error) ?
829 PTR_ERR((void*)error) : -EINVAL;
830 goto out_free_dentry;
833 if (!load_addr_set) {
835 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
836 if (loc->elf_ex.e_type == ET_DYN) {
838 ELF_PAGESTART(load_bias + vaddr);
839 load_addr += load_bias;
840 reloc_func_desc = load_bias;
843 k = elf_ppnt->p_vaddr;
850 * Check to see if the section's size will overflow the
851 * allowed task size. Note that p_filesz must always be
852 * <= p_memsz so it is only necessary to check p_memsz.
854 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
855 elf_ppnt->p_memsz > TASK_SIZE ||
856 TASK_SIZE - elf_ppnt->p_memsz < k) {
857 /* set_brk can never work. Avoid overflows. */
858 send_sig(SIGKILL, current, 0);
860 goto out_free_dentry;
863 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
867 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
871 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
876 loc->elf_ex.e_entry += load_bias;
877 elf_bss += load_bias;
878 elf_brk += load_bias;
879 start_code += load_bias;
880 end_code += load_bias;
881 start_data += load_bias;
882 end_data += load_bias;
884 /* Calling set_brk effectively mmaps the pages that we need
885 * for the bss and break sections. We must do this before
886 * mapping in the interpreter, to make sure it doesn't wind
887 * up getting placed where the bss needs to go.
889 retval = set_brk(elf_bss, elf_brk);
891 send_sig(SIGKILL, current, 0);
892 goto out_free_dentry;
894 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
895 send_sig(SIGSEGV, current, 0);
896 retval = -EFAULT; /* Nobody gets to see this, but.. */
897 goto out_free_dentry;
900 if (elf_interpreter) {
901 unsigned long interp_map_addr = 0;
903 elf_entry = load_elf_interp(&loc->interp_elf_ex,
907 if (!IS_ERR((void *)elf_entry)) {
909 * load_elf_interp() returns relocation
912 interp_load_addr = elf_entry;
913 elf_entry += loc->interp_elf_ex.e_entry;
915 if (BAD_ADDR(elf_entry)) {
916 force_sig(SIGSEGV, current);
917 retval = IS_ERR((void *)elf_entry) ?
918 (int)elf_entry : -EINVAL;
919 goto out_free_dentry;
921 reloc_func_desc = interp_load_addr;
923 allow_write_access(interpreter);
925 kfree(elf_interpreter);
927 elf_entry = loc->elf_ex.e_entry;
928 if (BAD_ADDR(elf_entry)) {
929 force_sig(SIGSEGV, current);
931 goto out_free_dentry;
937 set_binfmt(&elf_format);
939 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
940 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
942 send_sig(SIGKILL, current, 0);
945 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
947 install_exec_creds(bprm);
948 retval = create_elf_tables(bprm, &loc->elf_ex,
949 load_addr, interp_load_addr);
951 send_sig(SIGKILL, current, 0);
954 /* N.B. passed_fileno might not be initialized? */
955 current->mm->end_code = end_code;
956 current->mm->start_code = start_code;
957 current->mm->start_data = start_data;
958 current->mm->end_data = end_data;
959 current->mm->start_stack = bprm->p;
961 #ifdef arch_randomize_brk
962 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
963 current->mm->brk = current->mm->start_brk =
964 arch_randomize_brk(current->mm);
965 #ifdef CONFIG_COMPAT_BRK
966 current->brk_randomized = 1;
971 if (current->personality & MMAP_PAGE_ZERO) {
972 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
973 and some applications "depend" upon this behavior.
974 Since we do not have the power to recompile these, we
975 emulate the SVr4 behavior. Sigh. */
976 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
977 MAP_FIXED | MAP_PRIVATE, 0);
982 * The ABI may specify that certain registers be set up in special
983 * ways (on i386 %edx is the address of a DT_FINI function, for
984 * example. In addition, it may also specify (eg, PowerPC64 ELF)
985 * that the e_entry field is the address of the function descriptor
986 * for the startup routine, rather than the address of the startup
987 * routine itself. This macro performs whatever initialization to
988 * the regs structure is required as well as any relocations to the
989 * function descriptor entries when executing dynamically links apps.
991 ELF_PLAT_INIT(regs, reloc_func_desc);
994 start_thread(regs, elf_entry, bprm->p);
1003 allow_write_access(interpreter);
1007 kfree(elf_interpreter);
1013 #ifdef CONFIG_USELIB
1014 /* This is really simpleminded and specialized - we are loading an
1015 a.out library that is given an ELF header. */
1016 static int load_elf_library(struct file *file)
1018 struct elf_phdr *elf_phdata;
1019 struct elf_phdr *eppnt;
1020 unsigned long elf_bss, bss, len;
1021 int retval, error, i, j;
1022 struct elfhdr elf_ex;
1025 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1026 if (retval != sizeof(elf_ex))
1029 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1032 /* First of all, some simple consistency checks */
1033 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1034 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1037 /* Now read in all of the header information */
1039 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1040 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1043 elf_phdata = kmalloc(j, GFP_KERNEL);
1049 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1053 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1054 if ((eppnt + i)->p_type == PT_LOAD)
1059 while (eppnt->p_type != PT_LOAD)
1062 /* Now use mmap to map the library into memory. */
1063 error = vm_mmap(file,
1064 ELF_PAGESTART(eppnt->p_vaddr),
1066 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1067 PROT_READ | PROT_WRITE | PROT_EXEC,
1068 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1070 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1071 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1074 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1075 if (padzero(elf_bss)) {
1080 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1082 bss = eppnt->p_memsz + eppnt->p_vaddr;
1084 vm_brk(len, bss - len);
1092 #endif /* #ifdef CONFIG_USELIB */
1094 #ifdef CONFIG_ELF_CORE
1098 * Modelled on fs/exec.c:aout_core_dump()
1103 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1104 * that are useful for post-mortem analysis are included in every core dump.
1105 * In that way we ensure that the core dump is fully interpretable later
1106 * without matching up the same kernel and hardware config to see what PC values
1107 * meant. These special mappings include - vDSO, vsyscall, and other
1108 * architecture specific mappings
1110 static bool always_dump_vma(struct vm_area_struct *vma)
1112 /* Any vsyscall mappings? */
1113 if (vma == get_gate_vma(vma->vm_mm))
1116 * arch_vma_name() returns non-NULL for special architecture mappings,
1117 * such as vDSO sections.
1119 if (arch_vma_name(vma))
1126 * Decide what to dump of a segment, part, all or none.
1128 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1129 unsigned long mm_flags)
1131 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1133 /* always dump the vdso and vsyscall sections */
1134 if (always_dump_vma(vma))
1137 if (vma->vm_flags & VM_DONTDUMP)
1140 /* Hugetlb memory check */
1141 if (vma->vm_flags & VM_HUGETLB) {
1142 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1144 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1149 /* Do not dump I/O mapped devices or special mappings */
1150 if (vma->vm_flags & VM_IO)
1153 /* By default, dump shared memory if mapped from an anonymous file. */
1154 if (vma->vm_flags & VM_SHARED) {
1155 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1156 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1161 /* Dump segments that have been written to. */
1162 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1164 if (vma->vm_file == NULL)
1167 if (FILTER(MAPPED_PRIVATE))
1171 * If this looks like the beginning of a DSO or executable mapping,
1172 * check for an ELF header. If we find one, dump the first page to
1173 * aid in determining what was mapped here.
1175 if (FILTER(ELF_HEADERS) &&
1176 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1177 u32 __user *header = (u32 __user *) vma->vm_start;
1179 mm_segment_t fs = get_fs();
1181 * Doing it this way gets the constant folded by GCC.
1185 char elfmag[SELFMAG];
1187 BUILD_BUG_ON(SELFMAG != sizeof word);
1188 magic.elfmag[EI_MAG0] = ELFMAG0;
1189 magic.elfmag[EI_MAG1] = ELFMAG1;
1190 magic.elfmag[EI_MAG2] = ELFMAG2;
1191 magic.elfmag[EI_MAG3] = ELFMAG3;
1193 * Switch to the user "segment" for get_user(),
1194 * then put back what elf_core_dump() had in place.
1197 if (unlikely(get_user(word, header)))
1200 if (word == magic.cmp)
1209 return vma->vm_end - vma->vm_start;
1212 /* An ELF note in memory */
1217 unsigned int datasz;
1221 static int notesize(struct memelfnote *en)
1225 sz = sizeof(struct elf_note);
1226 sz += roundup(strlen(en->name) + 1, 4);
1227 sz += roundup(en->datasz, 4);
1232 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1235 en.n_namesz = strlen(men->name) + 1;
1236 en.n_descsz = men->datasz;
1237 en.n_type = men->type;
1239 return dump_emit(cprm, &en, sizeof(en)) &&
1240 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1241 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1244 static void fill_elf_header(struct elfhdr *elf, int segs,
1245 u16 machine, u32 flags)
1247 memset(elf, 0, sizeof(*elf));
1249 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1250 elf->e_ident[EI_CLASS] = ELF_CLASS;
1251 elf->e_ident[EI_DATA] = ELF_DATA;
1252 elf->e_ident[EI_VERSION] = EV_CURRENT;
1253 elf->e_ident[EI_OSABI] = ELF_OSABI;
1255 elf->e_type = ET_CORE;
1256 elf->e_machine = machine;
1257 elf->e_version = EV_CURRENT;
1258 elf->e_phoff = sizeof(struct elfhdr);
1259 elf->e_flags = flags;
1260 elf->e_ehsize = sizeof(struct elfhdr);
1261 elf->e_phentsize = sizeof(struct elf_phdr);
1262 elf->e_phnum = segs;
1267 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1269 phdr->p_type = PT_NOTE;
1270 phdr->p_offset = offset;
1273 phdr->p_filesz = sz;
1280 static void fill_note(struct memelfnote *note, const char *name, int type,
1281 unsigned int sz, void *data)
1291 * fill up all the fields in prstatus from the given task struct, except
1292 * registers which need to be filled up separately.
1294 static void fill_prstatus(struct elf_prstatus *prstatus,
1295 struct task_struct *p, long signr)
1297 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1298 prstatus->pr_sigpend = p->pending.signal.sig[0];
1299 prstatus->pr_sighold = p->blocked.sig[0];
1301 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1303 prstatus->pr_pid = task_pid_vnr(p);
1304 prstatus->pr_pgrp = task_pgrp_vnr(p);
1305 prstatus->pr_sid = task_session_vnr(p);
1306 if (thread_group_leader(p)) {
1307 struct task_cputime cputime;
1310 * This is the record for the group leader. It shows the
1311 * group-wide total, not its individual thread total.
1313 thread_group_cputime(p, &cputime);
1314 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1315 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1317 cputime_t utime, stime;
1319 task_cputime(p, &utime, &stime);
1320 cputime_to_timeval(utime, &prstatus->pr_utime);
1321 cputime_to_timeval(stime, &prstatus->pr_stime);
1323 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1324 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1327 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1328 struct mm_struct *mm)
1330 const struct cred *cred;
1331 unsigned int i, len;
1333 /* first copy the parameters from user space */
1334 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1336 len = mm->arg_end - mm->arg_start;
1337 if (len >= ELF_PRARGSZ)
1338 len = ELF_PRARGSZ-1;
1339 if (copy_from_user(&psinfo->pr_psargs,
1340 (const char __user *)mm->arg_start, len))
1342 for(i = 0; i < len; i++)
1343 if (psinfo->pr_psargs[i] == 0)
1344 psinfo->pr_psargs[i] = ' ';
1345 psinfo->pr_psargs[len] = 0;
1348 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1350 psinfo->pr_pid = task_pid_vnr(p);
1351 psinfo->pr_pgrp = task_pgrp_vnr(p);
1352 psinfo->pr_sid = task_session_vnr(p);
1354 i = p->state ? ffz(~p->state) + 1 : 0;
1355 psinfo->pr_state = i;
1356 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1357 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1358 psinfo->pr_nice = task_nice(p);
1359 psinfo->pr_flag = p->flags;
1361 cred = __task_cred(p);
1362 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1363 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1365 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1370 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1372 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1376 while (auxv[i - 2] != AT_NULL);
1377 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1380 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1381 const siginfo_t *siginfo)
1383 mm_segment_t old_fs = get_fs();
1385 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1387 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1390 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1392 * Format of NT_FILE note:
1394 * long count -- how many files are mapped
1395 * long page_size -- units for file_ofs
1396 * array of [COUNT] elements of
1400 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1402 static int fill_files_note(struct memelfnote *note)
1404 struct vm_area_struct *vma;
1405 unsigned count, size, names_ofs, remaining, n;
1407 user_long_t *start_end_ofs;
1408 char *name_base, *name_curpos;
1410 /* *Estimated* file count and total data size needed */
1411 count = current->mm->map_count;
1414 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1416 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1418 size = round_up(size, PAGE_SIZE);
1419 data = vmalloc(size);
1423 start_end_ofs = data + 2;
1424 name_base = name_curpos = ((char *)data) + names_ofs;
1425 remaining = size - names_ofs;
1427 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1429 const char *filename;
1431 file = vma->vm_file;
1434 filename = d_path(&file->f_path, name_curpos, remaining);
1435 if (IS_ERR(filename)) {
1436 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1438 size = size * 5 / 4;
1444 /* d_path() fills at the end, move name down */
1445 /* n = strlen(filename) + 1: */
1446 n = (name_curpos + remaining) - filename;
1447 remaining = filename - name_curpos;
1448 memmove(name_curpos, filename, n);
1451 *start_end_ofs++ = vma->vm_start;
1452 *start_end_ofs++ = vma->vm_end;
1453 *start_end_ofs++ = vma->vm_pgoff;
1457 /* Now we know exact count of files, can store it */
1459 data[1] = PAGE_SIZE;
1461 * Count usually is less than current->mm->map_count,
1462 * we need to move filenames down.
1464 n = current->mm->map_count - count;
1466 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1467 memmove(name_base - shift_bytes, name_base,
1468 name_curpos - name_base);
1469 name_curpos -= shift_bytes;
1472 size = name_curpos - (char *)data;
1473 fill_note(note, "CORE", NT_FILE, size, data);
1477 #ifdef CORE_DUMP_USE_REGSET
1478 #include <linux/regset.h>
1480 struct elf_thread_core_info {
1481 struct elf_thread_core_info *next;
1482 struct task_struct *task;
1483 struct elf_prstatus prstatus;
1484 struct memelfnote notes[0];
1487 struct elf_note_info {
1488 struct elf_thread_core_info *thread;
1489 struct memelfnote psinfo;
1490 struct memelfnote signote;
1491 struct memelfnote auxv;
1492 struct memelfnote files;
1493 user_siginfo_t csigdata;
1499 * When a regset has a writeback hook, we call it on each thread before
1500 * dumping user memory. On register window machines, this makes sure the
1501 * user memory backing the register data is up to date before we read it.
1503 static void do_thread_regset_writeback(struct task_struct *task,
1504 const struct user_regset *regset)
1506 if (regset->writeback)
1507 regset->writeback(task, regset, 1);
1511 #define PR_REG_SIZE(S) sizeof(S)
1514 #ifndef PRSTATUS_SIZE
1515 #define PRSTATUS_SIZE(S) sizeof(S)
1519 #define PR_REG_PTR(S) (&((S)->pr_reg))
1522 #ifndef SET_PR_FPVALID
1523 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1526 static int fill_thread_core_info(struct elf_thread_core_info *t,
1527 const struct user_regset_view *view,
1528 long signr, size_t *total)
1533 * NT_PRSTATUS is the one special case, because the regset data
1534 * goes into the pr_reg field inside the note contents, rather
1535 * than being the whole note contents. We fill the reset in here.
1536 * We assume that regset 0 is NT_PRSTATUS.
1538 fill_prstatus(&t->prstatus, t->task, signr);
1539 (void) view->regsets[0].get(t->task, &view->regsets[0],
1540 0, PR_REG_SIZE(t->prstatus.pr_reg),
1541 PR_REG_PTR(&t->prstatus), NULL);
1543 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1544 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1545 *total += notesize(&t->notes[0]);
1547 do_thread_regset_writeback(t->task, &view->regsets[0]);
1550 * Each other regset might generate a note too. For each regset
1551 * that has no core_note_type or is inactive, we leave t->notes[i]
1552 * all zero and we'll know to skip writing it later.
1554 for (i = 1; i < view->n; ++i) {
1555 const struct user_regset *regset = &view->regsets[i];
1556 do_thread_regset_writeback(t->task, regset);
1557 if (regset->core_note_type && regset->get &&
1558 (!regset->active || regset->active(t->task, regset))) {
1560 size_t size = regset->n * regset->size;
1561 void *data = kmalloc(size, GFP_KERNEL);
1562 if (unlikely(!data))
1564 ret = regset->get(t->task, regset,
1565 0, size, data, NULL);
1569 if (regset->core_note_type != NT_PRFPREG)
1570 fill_note(&t->notes[i], "LINUX",
1571 regset->core_note_type,
1574 SET_PR_FPVALID(&t->prstatus, 1);
1575 fill_note(&t->notes[i], "CORE",
1576 NT_PRFPREG, size, data);
1578 *total += notesize(&t->notes[i]);
1586 static int fill_note_info(struct elfhdr *elf, int phdrs,
1587 struct elf_note_info *info,
1588 const siginfo_t *siginfo, struct pt_regs *regs)
1590 struct task_struct *dump_task = current;
1591 const struct user_regset_view *view = task_user_regset_view(dump_task);
1592 struct elf_thread_core_info *t;
1593 struct elf_prpsinfo *psinfo;
1594 struct core_thread *ct;
1598 info->thread = NULL;
1600 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1601 if (psinfo == NULL) {
1602 info->psinfo.data = NULL; /* So we don't free this wrongly */
1606 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1609 * Figure out how many notes we're going to need for each thread.
1611 info->thread_notes = 0;
1612 for (i = 0; i < view->n; ++i)
1613 if (view->regsets[i].core_note_type != 0)
1614 ++info->thread_notes;
1617 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1618 * since it is our one special case.
1620 if (unlikely(info->thread_notes == 0) ||
1621 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1627 * Initialize the ELF file header.
1629 fill_elf_header(elf, phdrs,
1630 view->e_machine, view->e_flags);
1633 * Allocate a structure for each thread.
1635 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1636 t = kzalloc(offsetof(struct elf_thread_core_info,
1637 notes[info->thread_notes]),
1643 if (ct->task == dump_task || !info->thread) {
1644 t->next = info->thread;
1648 * Make sure to keep the original task at
1649 * the head of the list.
1651 t->next = info->thread->next;
1652 info->thread->next = t;
1657 * Now fill in each thread's information.
1659 for (t = info->thread; t != NULL; t = t->next)
1660 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1664 * Fill in the two process-wide notes.
1666 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1667 info->size += notesize(&info->psinfo);
1669 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1670 info->size += notesize(&info->signote);
1672 fill_auxv_note(&info->auxv, current->mm);
1673 info->size += notesize(&info->auxv);
1675 if (fill_files_note(&info->files) == 0)
1676 info->size += notesize(&info->files);
1681 static size_t get_note_info_size(struct elf_note_info *info)
1687 * Write all the notes for each thread. When writing the first thread, the
1688 * process-wide notes are interleaved after the first thread-specific note.
1690 static int write_note_info(struct elf_note_info *info,
1691 struct coredump_params *cprm)
1694 struct elf_thread_core_info *t = info->thread;
1699 if (!writenote(&t->notes[0], cprm))
1702 if (first && !writenote(&info->psinfo, cprm))
1704 if (first && !writenote(&info->signote, cprm))
1706 if (first && !writenote(&info->auxv, cprm))
1708 if (first && info->files.data &&
1709 !writenote(&info->files, cprm))
1712 for (i = 1; i < info->thread_notes; ++i)
1713 if (t->notes[i].data &&
1714 !writenote(&t->notes[i], cprm))
1724 static void free_note_info(struct elf_note_info *info)
1726 struct elf_thread_core_info *threads = info->thread;
1729 struct elf_thread_core_info *t = threads;
1731 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1732 for (i = 1; i < info->thread_notes; ++i)
1733 kfree(t->notes[i].data);
1736 kfree(info->psinfo.data);
1737 vfree(info->files.data);
1742 /* Here is the structure in which status of each thread is captured. */
1743 struct elf_thread_status
1745 struct list_head list;
1746 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1747 elf_fpregset_t fpu; /* NT_PRFPREG */
1748 struct task_struct *thread;
1749 #ifdef ELF_CORE_COPY_XFPREGS
1750 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1752 struct memelfnote notes[3];
1757 * In order to add the specific thread information for the elf file format,
1758 * we need to keep a linked list of every threads pr_status and then create
1759 * a single section for them in the final core file.
1761 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1764 struct task_struct *p = t->thread;
1767 fill_prstatus(&t->prstatus, p, signr);
1768 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1770 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1773 sz += notesize(&t->notes[0]);
1775 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1777 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1780 sz += notesize(&t->notes[1]);
1783 #ifdef ELF_CORE_COPY_XFPREGS
1784 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1785 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1786 sizeof(t->xfpu), &t->xfpu);
1788 sz += notesize(&t->notes[2]);
1794 struct elf_note_info {
1795 struct memelfnote *notes;
1796 struct memelfnote *notes_files;
1797 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1798 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1799 struct list_head thread_list;
1800 elf_fpregset_t *fpu;
1801 #ifdef ELF_CORE_COPY_XFPREGS
1802 elf_fpxregset_t *xfpu;
1804 user_siginfo_t csigdata;
1805 int thread_status_size;
1809 static int elf_note_info_init(struct elf_note_info *info)
1811 memset(info, 0, sizeof(*info));
1812 INIT_LIST_HEAD(&info->thread_list);
1814 /* Allocate space for ELF notes */
1815 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1818 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1821 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1822 if (!info->prstatus)
1824 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1827 #ifdef ELF_CORE_COPY_XFPREGS
1828 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1835 static int fill_note_info(struct elfhdr *elf, int phdrs,
1836 struct elf_note_info *info,
1837 const siginfo_t *siginfo, struct pt_regs *regs)
1839 struct list_head *t;
1840 struct core_thread *ct;
1841 struct elf_thread_status *ets;
1843 if (!elf_note_info_init(info))
1846 for (ct = current->mm->core_state->dumper.next;
1847 ct; ct = ct->next) {
1848 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1852 ets->thread = ct->task;
1853 list_add(&ets->list, &info->thread_list);
1856 list_for_each(t, &info->thread_list) {
1859 ets = list_entry(t, struct elf_thread_status, list);
1860 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1861 info->thread_status_size += sz;
1863 /* now collect the dump for the current */
1864 memset(info->prstatus, 0, sizeof(*info->prstatus));
1865 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1866 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1869 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1872 * Set up the notes in similar form to SVR4 core dumps made
1873 * with info from their /proc.
1876 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1877 sizeof(*info->prstatus), info->prstatus);
1878 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1879 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1880 sizeof(*info->psinfo), info->psinfo);
1882 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1883 fill_auxv_note(info->notes + 3, current->mm);
1886 if (fill_files_note(info->notes + info->numnote) == 0) {
1887 info->notes_files = info->notes + info->numnote;
1891 /* Try to dump the FPU. */
1892 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1894 if (info->prstatus->pr_fpvalid)
1895 fill_note(info->notes + info->numnote++,
1896 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1897 #ifdef ELF_CORE_COPY_XFPREGS
1898 if (elf_core_copy_task_xfpregs(current, info->xfpu))
1899 fill_note(info->notes + info->numnote++,
1900 "LINUX", ELF_CORE_XFPREG_TYPE,
1901 sizeof(*info->xfpu), info->xfpu);
1907 static size_t get_note_info_size(struct elf_note_info *info)
1912 for (i = 0; i < info->numnote; i++)
1913 sz += notesize(info->notes + i);
1915 sz += info->thread_status_size;
1920 static int write_note_info(struct elf_note_info *info,
1921 struct coredump_params *cprm)
1924 struct list_head *t;
1926 for (i = 0; i < info->numnote; i++)
1927 if (!writenote(info->notes + i, cprm))
1930 /* write out the thread status notes section */
1931 list_for_each(t, &info->thread_list) {
1932 struct elf_thread_status *tmp =
1933 list_entry(t, struct elf_thread_status, list);
1935 for (i = 0; i < tmp->num_notes; i++)
1936 if (!writenote(&tmp->notes[i], cprm))
1943 static void free_note_info(struct elf_note_info *info)
1945 while (!list_empty(&info->thread_list)) {
1946 struct list_head *tmp = info->thread_list.next;
1948 kfree(list_entry(tmp, struct elf_thread_status, list));
1951 /* Free data possibly allocated by fill_files_note(): */
1952 if (info->notes_files)
1953 vfree(info->notes_files->data);
1955 kfree(info->prstatus);
1956 kfree(info->psinfo);
1959 #ifdef ELF_CORE_COPY_XFPREGS
1966 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1967 struct vm_area_struct *gate_vma)
1969 struct vm_area_struct *ret = tsk->mm->mmap;
1976 * Helper function for iterating across a vma list. It ensures that the caller
1977 * will visit `gate_vma' prior to terminating the search.
1979 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1980 struct vm_area_struct *gate_vma)
1982 struct vm_area_struct *ret;
1984 ret = this_vma->vm_next;
1987 if (this_vma == gate_vma)
1992 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1993 elf_addr_t e_shoff, int segs)
1995 elf->e_shoff = e_shoff;
1996 elf->e_shentsize = sizeof(*shdr4extnum);
1998 elf->e_shstrndx = SHN_UNDEF;
2000 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2002 shdr4extnum->sh_type = SHT_NULL;
2003 shdr4extnum->sh_size = elf->e_shnum;
2004 shdr4extnum->sh_link = elf->e_shstrndx;
2005 shdr4extnum->sh_info = segs;
2008 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2009 unsigned long mm_flags)
2011 struct vm_area_struct *vma;
2014 for (vma = first_vma(current, gate_vma); vma != NULL;
2015 vma = next_vma(vma, gate_vma))
2016 size += vma_dump_size(vma, mm_flags);
2023 * This is a two-pass process; first we find the offsets of the bits,
2024 * and then they are actually written out. If we run out of core limit
2027 static int elf_core_dump(struct coredump_params *cprm)
2032 struct vm_area_struct *vma, *gate_vma;
2033 struct elfhdr *elf = NULL;
2034 loff_t offset = 0, dataoff;
2035 struct elf_note_info info = { };
2036 struct elf_phdr *phdr4note = NULL;
2037 struct elf_shdr *shdr4extnum = NULL;
2042 * We no longer stop all VM operations.
2044 * This is because those proceses that could possibly change map_count
2045 * or the mmap / vma pages are now blocked in do_exit on current
2046 * finishing this core dump.
2048 * Only ptrace can touch these memory addresses, but it doesn't change
2049 * the map_count or the pages allocated. So no possibility of crashing
2050 * exists while dumping the mm->vm_next areas to the core file.
2053 /* alloc memory for large data structures: too large to be on stack */
2054 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2058 * The number of segs are recored into ELF header as 16bit value.
2059 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2061 segs = current->mm->map_count;
2062 segs += elf_core_extra_phdrs();
2064 gate_vma = get_gate_vma(current->mm);
2065 if (gate_vma != NULL)
2068 /* for notes section */
2071 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2072 * this, kernel supports extended numbering. Have a look at
2073 * include/linux/elf.h for further information. */
2074 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2077 * Collect all the non-memory information about the process for the
2078 * notes. This also sets up the file header.
2080 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2088 offset += sizeof(*elf); /* Elf header */
2089 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2091 /* Write notes phdr entry */
2093 size_t sz = get_note_info_size(&info);
2095 sz += elf_coredump_extra_notes_size();
2097 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2101 fill_elf_note_phdr(phdr4note, sz, offset);
2105 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2107 offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2108 offset += elf_core_extra_data_size();
2111 if (e_phnum == PN_XNUM) {
2112 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2115 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2120 if (!dump_emit(cprm, elf, sizeof(*elf)))
2123 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2126 /* Write program headers for segments dump */
2127 for (vma = first_vma(current, gate_vma); vma != NULL;
2128 vma = next_vma(vma, gate_vma)) {
2129 struct elf_phdr phdr;
2131 phdr.p_type = PT_LOAD;
2132 phdr.p_offset = offset;
2133 phdr.p_vaddr = vma->vm_start;
2135 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2136 phdr.p_memsz = vma->vm_end - vma->vm_start;
2137 offset += phdr.p_filesz;
2138 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2139 if (vma->vm_flags & VM_WRITE)
2140 phdr.p_flags |= PF_W;
2141 if (vma->vm_flags & VM_EXEC)
2142 phdr.p_flags |= PF_X;
2143 phdr.p_align = ELF_EXEC_PAGESIZE;
2145 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2149 if (!elf_core_write_extra_phdrs(cprm, offset))
2152 /* write out the notes section */
2153 if (!write_note_info(&info, cprm))
2156 if (elf_coredump_extra_notes_write(cprm))
2160 if (!dump_skip(cprm, dataoff - cprm->written))
2163 for (vma = first_vma(current, gate_vma); vma != NULL;
2164 vma = next_vma(vma, gate_vma)) {
2168 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2170 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2174 page = get_dump_page(addr);
2176 void *kaddr = kmap(page);
2177 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2179 page_cache_release(page);
2181 stop = !dump_skip(cprm, PAGE_SIZE);
2187 if (!elf_core_write_extra_data(cprm))
2190 if (e_phnum == PN_XNUM) {
2191 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2199 free_note_info(&info);
2207 #endif /* CONFIG_ELF_CORE */
2209 static int __init init_elf_binfmt(void)
2211 register_binfmt(&elf_format);
2215 static void __exit exit_elf_binfmt(void)
2217 /* Remove the COFF and ELF loaders. */
2218 unregister_binfmt(&elf_format);
2221 core_initcall(init_elf_binfmt);
2222 module_exit(exit_elf_binfmt);
2223 MODULE_LICENSE("GPL");