2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
12 #include <linux/module.h>
13 #include <linux/kernel.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
39 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40 static int load_elf_library(struct file *);
41 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42 int, int, unsigned long);
45 * If we don't support core dumping, then supply a NULL so we
48 #ifdef CONFIG_ELF_CORE
49 static int elf_core_dump(struct coredump_params *cprm);
51 #define elf_core_dump NULL
54 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
55 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
57 #define ELF_MIN_ALIGN PAGE_SIZE
60 #ifndef ELF_CORE_EFLAGS
61 #define ELF_CORE_EFLAGS 0
64 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
68 static struct linux_binfmt elf_format = {
69 .module = THIS_MODULE,
70 .load_binary = load_elf_binary,
71 .load_shlib = load_elf_library,
72 .core_dump = elf_core_dump,
73 .min_coredump = ELF_EXEC_PAGESIZE,
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
78 static int set_brk(unsigned long start, unsigned long end)
80 start = ELF_PAGEALIGN(start);
81 end = ELF_PAGEALIGN(end);
84 addr = vm_brk(start, end - start);
88 current->mm->start_brk = current->mm->brk = end;
92 /* We need to explicitly zero any fractional pages
93 after the data section (i.e. bss). This would
94 contain the junk from the file that should not
97 static int padzero(unsigned long elf_bss)
101 nbyte = ELF_PAGEOFFSET(elf_bss);
103 nbyte = ELF_MIN_ALIGN - nbyte;
104 if (clear_user((void __user *) elf_bss, nbyte))
110 /* Let's use some macros to make this stack manipulation a little clearer */
111 #ifdef CONFIG_STACK_GROWSUP
112 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
113 #define STACK_ROUND(sp, items) \
114 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
115 #define STACK_ALLOC(sp, len) ({ \
116 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
120 #define STACK_ROUND(sp, items) \
121 (((unsigned long) (sp - items)) &~ 15UL)
122 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #ifndef ELF_BASE_PLATFORM
127 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
128 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
129 * will be copied to the user stack in the same manner as AT_PLATFORM.
131 #define ELF_BASE_PLATFORM NULL
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 unsigned long load_addr, unsigned long interp_load_addr)
138 unsigned long p = bprm->p;
139 int argc = bprm->argc;
140 int envc = bprm->envc;
141 elf_addr_t __user *argv;
142 elf_addr_t __user *envp;
143 elf_addr_t __user *sp;
144 elf_addr_t __user *u_platform;
145 elf_addr_t __user *u_base_platform;
146 elf_addr_t __user *u_rand_bytes;
147 const char *k_platform = ELF_PLATFORM;
148 const char *k_base_platform = ELF_BASE_PLATFORM;
149 unsigned char k_rand_bytes[16];
151 elf_addr_t *elf_info;
153 const struct cred *cred = current_cred();
154 struct vm_area_struct *vma;
157 * In some cases (e.g. Hyper-Threading), we want to avoid L1
158 * evictions by the processes running on the same package. One
159 * thing we can do is to shuffle the initial stack for them.
162 p = arch_align_stack(p);
165 * If this architecture has a platform capability string, copy it
166 * to userspace. In some cases (Sparc), this info is impossible
167 * for userspace to get any other way, in others (i386) it is
172 size_t len = strlen(k_platform) + 1;
174 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
175 if (__copy_to_user(u_platform, k_platform, len))
180 * If this architecture has a "base" platform capability
181 * string, copy it to userspace.
183 u_base_platform = NULL;
184 if (k_base_platform) {
185 size_t len = strlen(k_base_platform) + 1;
187 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
188 if (__copy_to_user(u_base_platform, k_base_platform, len))
193 * Generate 16 random bytes for userspace PRNG seeding.
195 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
196 u_rand_bytes = (elf_addr_t __user *)
197 STACK_ALLOC(p, sizeof(k_rand_bytes));
198 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201 /* Create the ELF interpreter info */
202 elf_info = (elf_addr_t *)current->mm->saved_auxv;
203 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
204 #define NEW_AUX_ENT(id, val) \
206 elf_info[ei_index++] = id; \
207 elf_info[ei_index++] = val; \
212 * ARCH_DLINFO must come first so PPC can do its special alignment of
214 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
215 * ARCH_DLINFO changes
219 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
220 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
221 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
222 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
223 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
224 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
225 NEW_AUX_ENT(AT_BASE, interp_load_addr);
226 NEW_AUX_ENT(AT_FLAGS, 0);
227 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
228 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
229 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
230 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
231 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
232 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
233 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
234 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
236 NEW_AUX_ENT(AT_PLATFORM,
237 (elf_addr_t)(unsigned long)u_platform);
239 if (k_base_platform) {
240 NEW_AUX_ENT(AT_BASE_PLATFORM,
241 (elf_addr_t)(unsigned long)u_base_platform);
243 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
244 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247 /* AT_NULL is zero; clear the rest too */
248 memset(&elf_info[ei_index], 0,
249 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
251 /* And advance past the AT_NULL entry. */
254 sp = STACK_ADD(p, ei_index);
256 items = (argc + 1) + (envc + 1) + 1;
257 bprm->p = STACK_ROUND(sp, items);
259 /* Point sp at the lowest address on the stack */
260 #ifdef CONFIG_STACK_GROWSUP
261 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
262 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
264 sp = (elf_addr_t __user *)bprm->p;
269 * Grow the stack manually; some architectures have a limit on how
270 * far ahead a user-space access may be in order to grow the stack.
272 vma = find_extend_vma(current->mm, bprm->p);
276 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
277 if (__put_user(argc, sp++))
280 envp = argv + argc + 1;
282 /* Populate argv and envp */
283 p = current->mm->arg_end = current->mm->arg_start;
286 if (__put_user((elf_addr_t)p, argv++))
288 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
289 if (!len || len > MAX_ARG_STRLEN)
293 if (__put_user(0, argv))
295 current->mm->arg_end = current->mm->env_start = p;
298 if (__put_user((elf_addr_t)p, envp++))
300 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
301 if (!len || len > MAX_ARG_STRLEN)
305 if (__put_user(0, envp))
307 current->mm->env_end = p;
309 /* Put the elf_info on the stack in the right place. */
310 sp = (elf_addr_t __user *)envp + 1;
311 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
316 static unsigned long elf_map(struct file *filep, unsigned long addr,
317 struct elf_phdr *eppnt, int prot, int type,
318 unsigned long total_size)
320 unsigned long map_addr;
321 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
322 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
323 addr = ELF_PAGESTART(addr);
324 size = ELF_PAGEALIGN(size);
326 /* mmap() will return -EINVAL if given a zero size, but a
327 * segment with zero filesize is perfectly valid */
332 * total_size is the size of the ELF (interpreter) image.
333 * The _first_ mmap needs to know the full size, otherwise
334 * randomization might put this image into an overlapping
335 * position with the ELF binary image. (since size < total_size)
336 * So we first map the 'big' image - and unmap the remainder at
337 * the end. (which unmap is needed for ELF images with holes.)
340 total_size = ELF_PAGEALIGN(total_size);
341 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
342 if (!BAD_ADDR(map_addr))
343 vm_munmap(map_addr+size, total_size-size);
345 map_addr = vm_mmap(filep, addr, size, prot, type, off);
350 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
352 int i, first_idx = -1, last_idx = -1;
354 for (i = 0; i < nr; i++) {
355 if (cmds[i].p_type == PT_LOAD) {
364 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
365 ELF_PAGESTART(cmds[first_idx].p_vaddr);
369 /* This is much more generalized than the library routine read function,
370 so we keep this separate. Technically the library read function
371 is only provided so that we can read a.out libraries that have
374 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
375 struct file *interpreter, unsigned long *interp_map_addr,
376 unsigned long no_base)
378 struct elf_phdr *elf_phdata;
379 struct elf_phdr *eppnt;
380 unsigned long load_addr = 0;
381 int load_addr_set = 0;
382 unsigned long last_bss = 0, elf_bss = 0;
383 unsigned long error = ~0UL;
384 unsigned long total_size;
387 /* First of all, some simple consistency checks */
388 if (interp_elf_ex->e_type != ET_EXEC &&
389 interp_elf_ex->e_type != ET_DYN)
391 if (!elf_check_arch(interp_elf_ex))
393 if (!interpreter->f_op || !interpreter->f_op->mmap)
397 * If the size of this structure has changed, then punt, since
398 * we will be doing the wrong thing.
400 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
402 if (interp_elf_ex->e_phnum < 1 ||
403 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
406 /* Now read in all of the header information */
407 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
408 if (size > ELF_MIN_ALIGN)
410 elf_phdata = kmalloc(size, GFP_KERNEL);
414 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
415 (char *)elf_phdata, size);
417 if (retval != size) {
423 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
430 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
431 if (eppnt->p_type == PT_LOAD) {
432 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
434 unsigned long vaddr = 0;
435 unsigned long k, map_addr;
437 if (eppnt->p_flags & PF_R)
438 elf_prot = PROT_READ;
439 if (eppnt->p_flags & PF_W)
440 elf_prot |= PROT_WRITE;
441 if (eppnt->p_flags & PF_X)
442 elf_prot |= PROT_EXEC;
443 vaddr = eppnt->p_vaddr;
444 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
445 elf_type |= MAP_FIXED;
446 else if (no_base && interp_elf_ex->e_type == ET_DYN)
449 map_addr = elf_map(interpreter, load_addr + vaddr,
450 eppnt, elf_prot, elf_type, total_size);
452 if (!*interp_map_addr)
453 *interp_map_addr = map_addr;
455 if (BAD_ADDR(map_addr))
458 if (!load_addr_set &&
459 interp_elf_ex->e_type == ET_DYN) {
460 load_addr = map_addr - ELF_PAGESTART(vaddr);
465 * Check to see if the section's size will overflow the
466 * allowed task size. Note that p_filesz must always be
467 * <= p_memsize so it's only necessary to check p_memsz.
469 k = load_addr + eppnt->p_vaddr;
471 eppnt->p_filesz > eppnt->p_memsz ||
472 eppnt->p_memsz > TASK_SIZE ||
473 TASK_SIZE - eppnt->p_memsz < k) {
479 * Find the end of the file mapping for this phdr, and
480 * keep track of the largest address we see for this.
482 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
487 * Do the same thing for the memory mapping - between
488 * elf_bss and last_bss is the bss section.
490 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
496 if (last_bss > elf_bss) {
498 * Now fill out the bss section. First pad the last page up
499 * to the page boundary, and then perform a mmap to make sure
500 * that there are zero-mapped pages up to and including the
503 if (padzero(elf_bss)) {
508 /* What we have mapped so far */
509 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
511 /* Map the last of the bss segment */
512 error = vm_brk(elf_bss, last_bss - elf_bss);
526 * These are the functions used to load ELF style executables and shared
527 * libraries. There is no binary dependent code anywhere else.
530 #define INTERPRETER_NONE 0
531 #define INTERPRETER_ELF 2
533 #ifndef STACK_RND_MASK
534 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
537 static unsigned long randomize_stack_top(unsigned long stack_top)
539 unsigned int random_variable = 0;
541 if ((current->flags & PF_RANDOMIZE) &&
542 !(current->personality & ADDR_NO_RANDOMIZE)) {
543 random_variable = get_random_int() & STACK_RND_MASK;
544 random_variable <<= PAGE_SHIFT;
546 #ifdef CONFIG_STACK_GROWSUP
547 return PAGE_ALIGN(stack_top) + random_variable;
549 return PAGE_ALIGN(stack_top) - random_variable;
553 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
555 struct file *interpreter = NULL; /* to shut gcc up */
556 unsigned long load_addr = 0, load_bias = 0;
557 int load_addr_set = 0;
558 char * elf_interpreter = NULL;
560 struct elf_phdr *elf_ppnt, *elf_phdata;
561 unsigned long elf_bss, elf_brk;
564 unsigned long elf_entry;
565 unsigned long interp_load_addr = 0;
566 unsigned long start_code, end_code, start_data, end_data;
567 unsigned long reloc_func_desc __maybe_unused = 0;
568 int executable_stack = EXSTACK_DEFAULT;
569 unsigned long def_flags = 0;
571 struct elfhdr elf_ex;
572 struct elfhdr interp_elf_ex;
575 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
581 /* Get the exec-header */
582 loc->elf_ex = *((struct elfhdr *)bprm->buf);
585 /* First of all, some simple consistency checks */
586 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
589 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
591 if (!elf_check_arch(&loc->elf_ex))
593 if (!bprm->file->f_op || !bprm->file->f_op->mmap)
596 /* Now read in all of the header information */
597 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
599 if (loc->elf_ex.e_phnum < 1 ||
600 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
602 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
604 elf_phdata = kmalloc(size, GFP_KERNEL);
608 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
609 (char *)elf_phdata, size);
610 if (retval != size) {
616 elf_ppnt = elf_phdata;
625 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
626 if (elf_ppnt->p_type == PT_INTERP) {
627 /* This is the program interpreter used for
628 * shared libraries - for now assume that this
629 * is an a.out format binary
632 if (elf_ppnt->p_filesz > PATH_MAX ||
633 elf_ppnt->p_filesz < 2)
637 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
639 if (!elf_interpreter)
642 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
645 if (retval != elf_ppnt->p_filesz) {
648 goto out_free_interp;
650 /* make sure path is NULL terminated */
652 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
653 goto out_free_interp;
655 interpreter = open_exec(elf_interpreter);
656 retval = PTR_ERR(interpreter);
657 if (IS_ERR(interpreter))
658 goto out_free_interp;
661 * If the binary is not readable then enforce
662 * mm->dumpable = 0 regardless of the interpreter's
665 would_dump(bprm, interpreter);
667 retval = kernel_read(interpreter, 0, bprm->buf,
669 if (retval != BINPRM_BUF_SIZE) {
672 goto out_free_dentry;
675 /* Get the exec headers */
676 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
682 elf_ppnt = elf_phdata;
683 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
684 if (elf_ppnt->p_type == PT_GNU_STACK) {
685 if (elf_ppnt->p_flags & PF_X)
686 executable_stack = EXSTACK_ENABLE_X;
688 executable_stack = EXSTACK_DISABLE_X;
692 /* Some simple consistency checks for the interpreter */
693 if (elf_interpreter) {
695 /* Not an ELF interpreter */
696 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
697 goto out_free_dentry;
698 /* Verify the interpreter has a valid arch */
699 if (!elf_check_arch(&loc->interp_elf_ex))
700 goto out_free_dentry;
703 /* Flush all traces of the currently running executable */
704 retval = flush_old_exec(bprm);
706 goto out_free_dentry;
708 /* OK, This is the point of no return */
709 current->mm->def_flags = def_flags;
711 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
712 may depend on the personality. */
713 SET_PERSONALITY(loc->elf_ex);
714 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
715 current->personality |= READ_IMPLIES_EXEC;
717 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
718 current->flags |= PF_RANDOMIZE;
720 setup_new_exec(bprm);
722 /* Do this so that we can load the interpreter, if need be. We will
723 change some of these later */
724 current->mm->free_area_cache = current->mm->mmap_base;
725 current->mm->cached_hole_size = 0;
726 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
729 send_sig(SIGKILL, current, 0);
730 goto out_free_dentry;
733 current->mm->start_stack = bprm->p;
735 /* Now we do a little grungy work by mmapping the ELF image into
736 the correct location in memory. */
737 for(i = 0, elf_ppnt = elf_phdata;
738 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
739 int elf_prot = 0, elf_flags;
740 unsigned long k, vaddr;
742 if (elf_ppnt->p_type != PT_LOAD)
745 if (unlikely (elf_brk > elf_bss)) {
748 /* There was a PT_LOAD segment with p_memsz > p_filesz
749 before this one. Map anonymous pages, if needed,
750 and clear the area. */
751 retval = set_brk(elf_bss + load_bias,
752 elf_brk + load_bias);
754 send_sig(SIGKILL, current, 0);
755 goto out_free_dentry;
757 nbyte = ELF_PAGEOFFSET(elf_bss);
759 nbyte = ELF_MIN_ALIGN - nbyte;
760 if (nbyte > elf_brk - elf_bss)
761 nbyte = elf_brk - elf_bss;
762 if (clear_user((void __user *)elf_bss +
765 * This bss-zeroing can fail if the ELF
766 * file specifies odd protections. So
767 * we don't check the return value
773 if (elf_ppnt->p_flags & PF_R)
774 elf_prot |= PROT_READ;
775 if (elf_ppnt->p_flags & PF_W)
776 elf_prot |= PROT_WRITE;
777 if (elf_ppnt->p_flags & PF_X)
778 elf_prot |= PROT_EXEC;
780 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
782 vaddr = elf_ppnt->p_vaddr;
783 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
784 elf_flags |= MAP_FIXED;
785 } else if (loc->elf_ex.e_type == ET_DYN) {
786 /* Try and get dynamic programs out of the way of the
787 * default mmap base, as well as whatever program they
788 * might try to exec. This is because the brk will
789 * follow the loader, and is not movable. */
790 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
791 /* Memory randomization might have been switched off
792 * in runtime via sysctl.
793 * If that is the case, retain the original non-zero
794 * load_bias value in order to establish proper
795 * non-randomized mappings.
797 if (current->flags & PF_RANDOMIZE)
800 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
802 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
806 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
807 elf_prot, elf_flags, 0);
808 if (BAD_ADDR(error)) {
809 send_sig(SIGKILL, current, 0);
810 retval = IS_ERR((void *)error) ?
811 PTR_ERR((void*)error) : -EINVAL;
812 goto out_free_dentry;
815 if (!load_addr_set) {
817 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
818 if (loc->elf_ex.e_type == ET_DYN) {
820 ELF_PAGESTART(load_bias + vaddr);
821 load_addr += load_bias;
822 reloc_func_desc = load_bias;
825 k = elf_ppnt->p_vaddr;
832 * Check to see if the section's size will overflow the
833 * allowed task size. Note that p_filesz must always be
834 * <= p_memsz so it is only necessary to check p_memsz.
836 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
837 elf_ppnt->p_memsz > TASK_SIZE ||
838 TASK_SIZE - elf_ppnt->p_memsz < k) {
839 /* set_brk can never work. Avoid overflows. */
840 send_sig(SIGKILL, current, 0);
842 goto out_free_dentry;
845 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
849 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
853 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
858 loc->elf_ex.e_entry += load_bias;
859 elf_bss += load_bias;
860 elf_brk += load_bias;
861 start_code += load_bias;
862 end_code += load_bias;
863 start_data += load_bias;
864 end_data += load_bias;
866 /* Calling set_brk effectively mmaps the pages that we need
867 * for the bss and break sections. We must do this before
868 * mapping in the interpreter, to make sure it doesn't wind
869 * up getting placed where the bss needs to go.
871 retval = set_brk(elf_bss, elf_brk);
873 send_sig(SIGKILL, current, 0);
874 goto out_free_dentry;
876 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
877 send_sig(SIGSEGV, current, 0);
878 retval = -EFAULT; /* Nobody gets to see this, but.. */
879 goto out_free_dentry;
882 if (elf_interpreter) {
883 unsigned long uninitialized_var(interp_map_addr);
885 elf_entry = load_elf_interp(&loc->interp_elf_ex,
889 if (!IS_ERR((void *)elf_entry)) {
891 * load_elf_interp() returns relocation
894 interp_load_addr = elf_entry;
895 elf_entry += loc->interp_elf_ex.e_entry;
897 if (BAD_ADDR(elf_entry)) {
898 force_sig(SIGSEGV, current);
899 retval = IS_ERR((void *)elf_entry) ?
900 (int)elf_entry : -EINVAL;
901 goto out_free_dentry;
903 reloc_func_desc = interp_load_addr;
905 allow_write_access(interpreter);
907 kfree(elf_interpreter);
909 elf_entry = loc->elf_ex.e_entry;
910 if (BAD_ADDR(elf_entry)) {
911 force_sig(SIGSEGV, current);
913 goto out_free_dentry;
919 set_binfmt(&elf_format);
921 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
922 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
924 send_sig(SIGKILL, current, 0);
927 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
929 install_exec_creds(bprm);
930 retval = create_elf_tables(bprm, &loc->elf_ex,
931 load_addr, interp_load_addr);
933 send_sig(SIGKILL, current, 0);
936 /* N.B. passed_fileno might not be initialized? */
937 current->mm->end_code = end_code;
938 current->mm->start_code = start_code;
939 current->mm->start_data = start_data;
940 current->mm->end_data = end_data;
941 current->mm->start_stack = bprm->p;
943 #ifdef arch_randomize_brk
944 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
945 current->mm->brk = current->mm->start_brk =
946 arch_randomize_brk(current->mm);
947 #ifdef CONFIG_COMPAT_BRK
948 current->brk_randomized = 1;
953 if (current->personality & MMAP_PAGE_ZERO) {
954 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
955 and some applications "depend" upon this behavior.
956 Since we do not have the power to recompile these, we
957 emulate the SVr4 behavior. Sigh. */
958 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
959 MAP_FIXED | MAP_PRIVATE, 0);
964 * The ABI may specify that certain registers be set up in special
965 * ways (on i386 %edx is the address of a DT_FINI function, for
966 * example. In addition, it may also specify (eg, PowerPC64 ELF)
967 * that the e_entry field is the address of the function descriptor
968 * for the startup routine, rather than the address of the startup
969 * routine itself. This macro performs whatever initialization to
970 * the regs structure is required as well as any relocations to the
971 * function descriptor entries when executing dynamically links apps.
973 ELF_PLAT_INIT(regs, reloc_func_desc);
976 start_thread(regs, elf_entry, bprm->p);
985 allow_write_access(interpreter);
989 kfree(elf_interpreter);
995 /* This is really simpleminded and specialized - we are loading an
996 a.out library that is given an ELF header. */
997 static int load_elf_library(struct file *file)
999 struct elf_phdr *elf_phdata;
1000 struct elf_phdr *eppnt;
1001 unsigned long elf_bss, bss, len;
1002 int retval, error, i, j;
1003 struct elfhdr elf_ex;
1006 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1007 if (retval != sizeof(elf_ex))
1010 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1013 /* First of all, some simple consistency checks */
1014 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1015 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1018 /* Now read in all of the header information */
1020 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1021 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1024 elf_phdata = kmalloc(j, GFP_KERNEL);
1030 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1034 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1035 if ((eppnt + i)->p_type == PT_LOAD)
1040 while (eppnt->p_type != PT_LOAD)
1043 /* Now use mmap to map the library into memory. */
1044 error = vm_mmap(file,
1045 ELF_PAGESTART(eppnt->p_vaddr),
1047 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1048 PROT_READ | PROT_WRITE | PROT_EXEC,
1049 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1051 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1052 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1055 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1056 if (padzero(elf_bss)) {
1061 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1063 bss = eppnt->p_memsz + eppnt->p_vaddr;
1065 vm_brk(len, bss - len);
1074 #ifdef CONFIG_ELF_CORE
1078 * Modelled on fs/exec.c:aout_core_dump()
1083 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1084 * that are useful for post-mortem analysis are included in every core dump.
1085 * In that way we ensure that the core dump is fully interpretable later
1086 * without matching up the same kernel and hardware config to see what PC values
1087 * meant. These special mappings include - vDSO, vsyscall, and other
1088 * architecture specific mappings
1090 static bool always_dump_vma(struct vm_area_struct *vma)
1092 /* Any vsyscall mappings? */
1093 if (vma == get_gate_vma(vma->vm_mm))
1096 * arch_vma_name() returns non-NULL for special architecture mappings,
1097 * such as vDSO sections.
1099 if (arch_vma_name(vma))
1106 * Decide what to dump of a segment, part, all or none.
1108 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1109 unsigned long mm_flags)
1111 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1113 /* always dump the vdso and vsyscall sections */
1114 if (always_dump_vma(vma))
1117 if (vma->vm_flags & VM_NODUMP)
1120 /* Hugetlb memory check */
1121 if (vma->vm_flags & VM_HUGETLB) {
1122 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1124 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1128 /* Do not dump I/O mapped devices or special mappings */
1129 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1132 /* By default, dump shared memory if mapped from an anonymous file. */
1133 if (vma->vm_flags & VM_SHARED) {
1134 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1135 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1140 /* Dump segments that have been written to. */
1141 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1143 if (vma->vm_file == NULL)
1146 if (FILTER(MAPPED_PRIVATE))
1150 * If this looks like the beginning of a DSO or executable mapping,
1151 * check for an ELF header. If we find one, dump the first page to
1152 * aid in determining what was mapped here.
1154 if (FILTER(ELF_HEADERS) &&
1155 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1156 u32 __user *header = (u32 __user *) vma->vm_start;
1158 mm_segment_t fs = get_fs();
1160 * Doing it this way gets the constant folded by GCC.
1164 char elfmag[SELFMAG];
1166 BUILD_BUG_ON(SELFMAG != sizeof word);
1167 magic.elfmag[EI_MAG0] = ELFMAG0;
1168 magic.elfmag[EI_MAG1] = ELFMAG1;
1169 magic.elfmag[EI_MAG2] = ELFMAG2;
1170 magic.elfmag[EI_MAG3] = ELFMAG3;
1172 * Switch to the user "segment" for get_user(),
1173 * then put back what elf_core_dump() had in place.
1176 if (unlikely(get_user(word, header)))
1179 if (word == magic.cmp)
1188 return vma->vm_end - vma->vm_start;
1191 /* An ELF note in memory */
1196 unsigned int datasz;
1200 static int notesize(struct memelfnote *en)
1204 sz = sizeof(struct elf_note);
1205 sz += roundup(strlen(en->name) + 1, 4);
1206 sz += roundup(en->datasz, 4);
1211 #define DUMP_WRITE(addr, nr, foffset) \
1212 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1214 static int alignfile(struct file *file, loff_t *foffset)
1216 static const char buf[4] = { 0, };
1217 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1221 static int writenote(struct memelfnote *men, struct file *file,
1225 en.n_namesz = strlen(men->name) + 1;
1226 en.n_descsz = men->datasz;
1227 en.n_type = men->type;
1229 DUMP_WRITE(&en, sizeof(en), foffset);
1230 DUMP_WRITE(men->name, en.n_namesz, foffset);
1231 if (!alignfile(file, foffset))
1233 DUMP_WRITE(men->data, men->datasz, foffset);
1234 if (!alignfile(file, foffset))
1241 static void fill_elf_header(struct elfhdr *elf, int segs,
1242 u16 machine, u32 flags, u8 osabi)
1244 memset(elf, 0, sizeof(*elf));
1246 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1247 elf->e_ident[EI_CLASS] = ELF_CLASS;
1248 elf->e_ident[EI_DATA] = ELF_DATA;
1249 elf->e_ident[EI_VERSION] = EV_CURRENT;
1250 elf->e_ident[EI_OSABI] = ELF_OSABI;
1252 elf->e_type = ET_CORE;
1253 elf->e_machine = machine;
1254 elf->e_version = EV_CURRENT;
1255 elf->e_phoff = sizeof(struct elfhdr);
1256 elf->e_flags = flags;
1257 elf->e_ehsize = sizeof(struct elfhdr);
1258 elf->e_phentsize = sizeof(struct elf_phdr);
1259 elf->e_phnum = segs;
1264 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1266 phdr->p_type = PT_NOTE;
1267 phdr->p_offset = offset;
1270 phdr->p_filesz = sz;
1277 static void fill_note(struct memelfnote *note, const char *name, int type,
1278 unsigned int sz, void *data)
1288 * fill up all the fields in prstatus from the given task struct, except
1289 * registers which need to be filled up separately.
1291 static void fill_prstatus(struct elf_prstatus *prstatus,
1292 struct task_struct *p, long signr)
1294 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1295 prstatus->pr_sigpend = p->pending.signal.sig[0];
1296 prstatus->pr_sighold = p->blocked.sig[0];
1298 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1300 prstatus->pr_pid = task_pid_vnr(p);
1301 prstatus->pr_pgrp = task_pgrp_vnr(p);
1302 prstatus->pr_sid = task_session_vnr(p);
1303 if (thread_group_leader(p)) {
1304 struct task_cputime cputime;
1307 * This is the record for the group leader. It shows the
1308 * group-wide total, not its individual thread total.
1310 thread_group_cputime(p, &cputime);
1311 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1312 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1314 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1315 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1317 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1318 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1321 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1322 struct mm_struct *mm)
1324 const struct cred *cred;
1325 unsigned int i, len;
1327 /* first copy the parameters from user space */
1328 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1330 len = mm->arg_end - mm->arg_start;
1331 if (len >= ELF_PRARGSZ)
1332 len = ELF_PRARGSZ-1;
1333 if (copy_from_user(&psinfo->pr_psargs,
1334 (const char __user *)mm->arg_start, len))
1336 for(i = 0; i < len; i++)
1337 if (psinfo->pr_psargs[i] == 0)
1338 psinfo->pr_psargs[i] = ' ';
1339 psinfo->pr_psargs[len] = 0;
1342 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1344 psinfo->pr_pid = task_pid_vnr(p);
1345 psinfo->pr_pgrp = task_pgrp_vnr(p);
1346 psinfo->pr_sid = task_session_vnr(p);
1348 i = p->state ? ffz(~p->state) + 1 : 0;
1349 psinfo->pr_state = i;
1350 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1351 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1352 psinfo->pr_nice = task_nice(p);
1353 psinfo->pr_flag = p->flags;
1355 cred = __task_cred(p);
1356 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1357 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1359 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1364 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1366 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1370 while (auxv[i - 2] != AT_NULL);
1371 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1374 #ifdef CORE_DUMP_USE_REGSET
1375 #include <linux/regset.h>
1377 struct elf_thread_core_info {
1378 struct elf_thread_core_info *next;
1379 struct task_struct *task;
1380 struct elf_prstatus prstatus;
1381 struct memelfnote notes[0];
1384 struct elf_note_info {
1385 struct elf_thread_core_info *thread;
1386 struct memelfnote psinfo;
1387 struct memelfnote auxv;
1393 * When a regset has a writeback hook, we call it on each thread before
1394 * dumping user memory. On register window machines, this makes sure the
1395 * user memory backing the register data is up to date before we read it.
1397 static void do_thread_regset_writeback(struct task_struct *task,
1398 const struct user_regset *regset)
1400 if (regset->writeback)
1401 regset->writeback(task, regset, 1);
1405 #define PR_REG_SIZE(S) sizeof(S)
1408 #ifndef PRSTATUS_SIZE
1409 #define PRSTATUS_SIZE(S) sizeof(S)
1413 #define PR_REG_PTR(S) (&((S)->pr_reg))
1416 #ifndef SET_PR_FPVALID
1417 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1420 static int fill_thread_core_info(struct elf_thread_core_info *t,
1421 const struct user_regset_view *view,
1422 long signr, size_t *total)
1427 * NT_PRSTATUS is the one special case, because the regset data
1428 * goes into the pr_reg field inside the note contents, rather
1429 * than being the whole note contents. We fill the reset in here.
1430 * We assume that regset 0 is NT_PRSTATUS.
1432 fill_prstatus(&t->prstatus, t->task, signr);
1433 (void) view->regsets[0].get(t->task, &view->regsets[0],
1434 0, PR_REG_SIZE(t->prstatus.pr_reg),
1435 PR_REG_PTR(&t->prstatus), NULL);
1437 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1438 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1439 *total += notesize(&t->notes[0]);
1441 do_thread_regset_writeback(t->task, &view->regsets[0]);
1444 * Each other regset might generate a note too. For each regset
1445 * that has no core_note_type or is inactive, we leave t->notes[i]
1446 * all zero and we'll know to skip writing it later.
1448 for (i = 1; i < view->n; ++i) {
1449 const struct user_regset *regset = &view->regsets[i];
1450 do_thread_regset_writeback(t->task, regset);
1451 if (regset->core_note_type && regset->get &&
1452 (!regset->active || regset->active(t->task, regset))) {
1454 size_t size = regset->n * regset->size;
1455 void *data = kmalloc(size, GFP_KERNEL);
1456 if (unlikely(!data))
1458 ret = regset->get(t->task, regset,
1459 0, size, data, NULL);
1463 if (regset->core_note_type != NT_PRFPREG)
1464 fill_note(&t->notes[i], "LINUX",
1465 regset->core_note_type,
1468 SET_PR_FPVALID(&t->prstatus, 1);
1469 fill_note(&t->notes[i], "CORE",
1470 NT_PRFPREG, size, data);
1472 *total += notesize(&t->notes[i]);
1480 static int fill_note_info(struct elfhdr *elf, int phdrs,
1481 struct elf_note_info *info,
1482 long signr, struct pt_regs *regs)
1484 struct task_struct *dump_task = current;
1485 const struct user_regset_view *view = task_user_regset_view(dump_task);
1486 struct elf_thread_core_info *t;
1487 struct elf_prpsinfo *psinfo;
1488 struct core_thread *ct;
1492 info->thread = NULL;
1494 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1498 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1501 * Figure out how many notes we're going to need for each thread.
1503 info->thread_notes = 0;
1504 for (i = 0; i < view->n; ++i)
1505 if (view->regsets[i].core_note_type != 0)
1506 ++info->thread_notes;
1509 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1510 * since it is our one special case.
1512 if (unlikely(info->thread_notes == 0) ||
1513 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1519 * Initialize the ELF file header.
1521 fill_elf_header(elf, phdrs,
1522 view->e_machine, view->e_flags, view->ei_osabi);
1525 * Allocate a structure for each thread.
1527 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1528 t = kzalloc(offsetof(struct elf_thread_core_info,
1529 notes[info->thread_notes]),
1535 if (ct->task == dump_task || !info->thread) {
1536 t->next = info->thread;
1540 * Make sure to keep the original task at
1541 * the head of the list.
1543 t->next = info->thread->next;
1544 info->thread->next = t;
1549 * Now fill in each thread's information.
1551 for (t = info->thread; t != NULL; t = t->next)
1552 if (!fill_thread_core_info(t, view, signr, &info->size))
1556 * Fill in the two process-wide notes.
1558 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1559 info->size += notesize(&info->psinfo);
1561 fill_auxv_note(&info->auxv, current->mm);
1562 info->size += notesize(&info->auxv);
1567 static size_t get_note_info_size(struct elf_note_info *info)
1573 * Write all the notes for each thread. When writing the first thread, the
1574 * process-wide notes are interleaved after the first thread-specific note.
1576 static int write_note_info(struct elf_note_info *info,
1577 struct file *file, loff_t *foffset)
1580 struct elf_thread_core_info *t = info->thread;
1585 if (!writenote(&t->notes[0], file, foffset))
1588 if (first && !writenote(&info->psinfo, file, foffset))
1590 if (first && !writenote(&info->auxv, file, foffset))
1593 for (i = 1; i < info->thread_notes; ++i)
1594 if (t->notes[i].data &&
1595 !writenote(&t->notes[i], file, foffset))
1605 static void free_note_info(struct elf_note_info *info)
1607 struct elf_thread_core_info *threads = info->thread;
1610 struct elf_thread_core_info *t = threads;
1612 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1613 for (i = 1; i < info->thread_notes; ++i)
1614 kfree(t->notes[i].data);
1617 kfree(info->psinfo.data);
1622 /* Here is the structure in which status of each thread is captured. */
1623 struct elf_thread_status
1625 struct list_head list;
1626 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1627 elf_fpregset_t fpu; /* NT_PRFPREG */
1628 struct task_struct *thread;
1629 #ifdef ELF_CORE_COPY_XFPREGS
1630 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1632 struct memelfnote notes[3];
1637 * In order to add the specific thread information for the elf file format,
1638 * we need to keep a linked list of every threads pr_status and then create
1639 * a single section for them in the final core file.
1641 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1644 struct task_struct *p = t->thread;
1647 fill_prstatus(&t->prstatus, p, signr);
1648 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1650 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1653 sz += notesize(&t->notes[0]);
1655 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1657 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1660 sz += notesize(&t->notes[1]);
1663 #ifdef ELF_CORE_COPY_XFPREGS
1664 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1665 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1666 sizeof(t->xfpu), &t->xfpu);
1668 sz += notesize(&t->notes[2]);
1674 struct elf_note_info {
1675 struct memelfnote *notes;
1676 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1677 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1678 struct list_head thread_list;
1679 elf_fpregset_t *fpu;
1680 #ifdef ELF_CORE_COPY_XFPREGS
1681 elf_fpxregset_t *xfpu;
1683 int thread_status_size;
1687 static int elf_note_info_init(struct elf_note_info *info)
1689 memset(info, 0, sizeof(*info));
1690 INIT_LIST_HEAD(&info->thread_list);
1692 /* Allocate space for six ELF notes */
1693 info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1696 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1699 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1700 if (!info->prstatus)
1702 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1705 #ifdef ELF_CORE_COPY_XFPREGS
1706 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1711 #ifdef ELF_CORE_COPY_XFPREGS
1716 kfree(info->prstatus);
1718 kfree(info->psinfo);
1724 static int fill_note_info(struct elfhdr *elf, int phdrs,
1725 struct elf_note_info *info,
1726 long signr, struct pt_regs *regs)
1728 struct list_head *t;
1730 if (!elf_note_info_init(info))
1734 struct core_thread *ct;
1735 struct elf_thread_status *ets;
1737 for (ct = current->mm->core_state->dumper.next;
1738 ct; ct = ct->next) {
1739 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1743 ets->thread = ct->task;
1744 list_add(&ets->list, &info->thread_list);
1747 list_for_each(t, &info->thread_list) {
1750 ets = list_entry(t, struct elf_thread_status, list);
1751 sz = elf_dump_thread_status(signr, ets);
1752 info->thread_status_size += sz;
1755 /* now collect the dump for the current */
1756 memset(info->prstatus, 0, sizeof(*info->prstatus));
1757 fill_prstatus(info->prstatus, current, signr);
1758 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1761 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1764 * Set up the notes in similar form to SVR4 core dumps made
1765 * with info from their /proc.
1768 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1769 sizeof(*info->prstatus), info->prstatus);
1770 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1771 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1772 sizeof(*info->psinfo), info->psinfo);
1776 fill_auxv_note(&info->notes[info->numnote++], current->mm);
1778 /* Try to dump the FPU. */
1779 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1781 if (info->prstatus->pr_fpvalid)
1782 fill_note(info->notes + info->numnote++,
1783 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1784 #ifdef ELF_CORE_COPY_XFPREGS
1785 if (elf_core_copy_task_xfpregs(current, info->xfpu))
1786 fill_note(info->notes + info->numnote++,
1787 "LINUX", ELF_CORE_XFPREG_TYPE,
1788 sizeof(*info->xfpu), info->xfpu);
1794 static size_t get_note_info_size(struct elf_note_info *info)
1799 for (i = 0; i < info->numnote; i++)
1800 sz += notesize(info->notes + i);
1802 sz += info->thread_status_size;
1807 static int write_note_info(struct elf_note_info *info,
1808 struct file *file, loff_t *foffset)
1811 struct list_head *t;
1813 for (i = 0; i < info->numnote; i++)
1814 if (!writenote(info->notes + i, file, foffset))
1817 /* write out the thread status notes section */
1818 list_for_each(t, &info->thread_list) {
1819 struct elf_thread_status *tmp =
1820 list_entry(t, struct elf_thread_status, list);
1822 for (i = 0; i < tmp->num_notes; i++)
1823 if (!writenote(&tmp->notes[i], file, foffset))
1830 static void free_note_info(struct elf_note_info *info)
1832 while (!list_empty(&info->thread_list)) {
1833 struct list_head *tmp = info->thread_list.next;
1835 kfree(list_entry(tmp, struct elf_thread_status, list));
1838 kfree(info->prstatus);
1839 kfree(info->psinfo);
1842 #ifdef ELF_CORE_COPY_XFPREGS
1849 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1850 struct vm_area_struct *gate_vma)
1852 struct vm_area_struct *ret = tsk->mm->mmap;
1859 * Helper function for iterating across a vma list. It ensures that the caller
1860 * will visit `gate_vma' prior to terminating the search.
1862 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1863 struct vm_area_struct *gate_vma)
1865 struct vm_area_struct *ret;
1867 ret = this_vma->vm_next;
1870 if (this_vma == gate_vma)
1875 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1876 elf_addr_t e_shoff, int segs)
1878 elf->e_shoff = e_shoff;
1879 elf->e_shentsize = sizeof(*shdr4extnum);
1881 elf->e_shstrndx = SHN_UNDEF;
1883 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1885 shdr4extnum->sh_type = SHT_NULL;
1886 shdr4extnum->sh_size = elf->e_shnum;
1887 shdr4extnum->sh_link = elf->e_shstrndx;
1888 shdr4extnum->sh_info = segs;
1891 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1892 unsigned long mm_flags)
1894 struct vm_area_struct *vma;
1897 for (vma = first_vma(current, gate_vma); vma != NULL;
1898 vma = next_vma(vma, gate_vma))
1899 size += vma_dump_size(vma, mm_flags);
1906 * This is a two-pass process; first we find the offsets of the bits,
1907 * and then they are actually written out. If we run out of core limit
1910 static int elf_core_dump(struct coredump_params *cprm)
1916 struct vm_area_struct *vma, *gate_vma;
1917 struct elfhdr *elf = NULL;
1918 loff_t offset = 0, dataoff, foffset;
1919 struct elf_note_info info;
1920 struct elf_phdr *phdr4note = NULL;
1921 struct elf_shdr *shdr4extnum = NULL;
1926 * We no longer stop all VM operations.
1928 * This is because those proceses that could possibly change map_count
1929 * or the mmap / vma pages are now blocked in do_exit on current
1930 * finishing this core dump.
1932 * Only ptrace can touch these memory addresses, but it doesn't change
1933 * the map_count or the pages allocated. So no possibility of crashing
1934 * exists while dumping the mm->vm_next areas to the core file.
1937 /* alloc memory for large data structures: too large to be on stack */
1938 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1942 * The number of segs are recored into ELF header as 16bit value.
1943 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1945 segs = current->mm->map_count;
1946 segs += elf_core_extra_phdrs();
1948 gate_vma = get_gate_vma(current->mm);
1949 if (gate_vma != NULL)
1952 /* for notes section */
1955 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1956 * this, kernel supports extended numbering. Have a look at
1957 * include/linux/elf.h for further information. */
1958 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1961 * Collect all the non-memory information about the process for the
1962 * notes. This also sets up the file header.
1964 if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1968 current->flags |= PF_DUMPCORE;
1973 offset += sizeof(*elf); /* Elf header */
1974 offset += segs * sizeof(struct elf_phdr); /* Program headers */
1977 /* Write notes phdr entry */
1979 size_t sz = get_note_info_size(&info);
1981 sz += elf_coredump_extra_notes_size();
1983 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1987 fill_elf_note_phdr(phdr4note, sz, offset);
1991 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1993 offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1994 offset += elf_core_extra_data_size();
1997 if (e_phnum == PN_XNUM) {
1998 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2001 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2006 size += sizeof(*elf);
2007 if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2010 size += sizeof(*phdr4note);
2011 if (size > cprm->limit
2012 || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2015 /* Write program headers for segments dump */
2016 for (vma = first_vma(current, gate_vma); vma != NULL;
2017 vma = next_vma(vma, gate_vma)) {
2018 struct elf_phdr phdr;
2020 phdr.p_type = PT_LOAD;
2021 phdr.p_offset = offset;
2022 phdr.p_vaddr = vma->vm_start;
2024 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2025 phdr.p_memsz = vma->vm_end - vma->vm_start;
2026 offset += phdr.p_filesz;
2027 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2028 if (vma->vm_flags & VM_WRITE)
2029 phdr.p_flags |= PF_W;
2030 if (vma->vm_flags & VM_EXEC)
2031 phdr.p_flags |= PF_X;
2032 phdr.p_align = ELF_EXEC_PAGESIZE;
2034 size += sizeof(phdr);
2035 if (size > cprm->limit
2036 || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2040 if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2043 /* write out the notes section */
2044 if (!write_note_info(&info, cprm->file, &foffset))
2047 if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2051 if (!dump_seek(cprm->file, dataoff - foffset))
2054 for (vma = first_vma(current, gate_vma); vma != NULL;
2055 vma = next_vma(vma, gate_vma)) {
2059 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2061 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2065 page = get_dump_page(addr);
2067 void *kaddr = kmap(page);
2068 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2069 !dump_write(cprm->file, kaddr,
2072 page_cache_release(page);
2074 stop = !dump_seek(cprm->file, PAGE_SIZE);
2080 if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2083 if (e_phnum == PN_XNUM) {
2084 size += sizeof(*shdr4extnum);
2085 if (size > cprm->limit
2086 || !dump_write(cprm->file, shdr4extnum,
2087 sizeof(*shdr4extnum)))
2095 free_note_info(&info);
2103 #endif /* CONFIG_ELF_CORE */
2105 static int __init init_elf_binfmt(void)
2107 register_binfmt(&elf_format);
2111 static void __exit exit_elf_binfmt(void)
2113 /* Remove the COFF and ELF loaders. */
2114 unregister_binfmt(&elf_format);
2117 core_initcall(init_elf_binfmt);
2118 module_exit(exit_elf_binfmt);
2119 MODULE_LICENSE("GPL");