Linux Audio

Check our new training course

Loading...
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/security.h>
  31#include <linux/random.h>
  32#include <linux/elf.h>
  33#include <linux/utsname.h>
  34#include <linux/coredump.h>
  35#include <asm/uaccess.h>
  36#include <asm/param.h>
  37#include <asm/page.h>
  38#include <asm/exec.h>
  39
  40static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  41static int load_elf_library(struct file *);
  42static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  43				int, int, unsigned long);
  44
  45/*
  46 * If we don't support core dumping, then supply a NULL so we
  47 * don't even try.
  48 */
  49#ifdef CONFIG_ELF_CORE
  50static int elf_core_dump(struct coredump_params *cprm);
  51#else
  52#define elf_core_dump	NULL
  53#endif
  54
  55#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  56#define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
  57#else
  58#define ELF_MIN_ALIGN	PAGE_SIZE
  59#endif
  60
  61#ifndef ELF_CORE_EFLAGS
  62#define ELF_CORE_EFLAGS	0
  63#endif
  64
  65#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  66#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  67#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  68
  69static struct linux_binfmt elf_format = {
  70	.module		= THIS_MODULE,
  71	.load_binary	= load_elf_binary,
  72	.load_shlib	= load_elf_library,
  73	.core_dump	= elf_core_dump,
  74	.min_coredump	= ELF_EXEC_PAGESIZE,
  75};
  76
  77#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  78
  79static int set_brk(unsigned long start, unsigned long end)
  80{
  81	start = ELF_PAGEALIGN(start);
  82	end = ELF_PAGEALIGN(end);
  83	if (end > start) {
  84		unsigned long addr;
  85		addr = vm_brk(start, end - start);
  86		if (BAD_ADDR(addr))
  87			return addr;
  88	}
  89	current->mm->start_brk = current->mm->brk = end;
  90	return 0;
  91}
  92
  93/* We need to explicitly zero any fractional pages
  94   after the data section (i.e. bss).  This would
  95   contain the junk from the file that should not
  96   be in memory
  97 */
  98static int padzero(unsigned long elf_bss)
  99{
 100	unsigned long nbyte;
 101
 102	nbyte = ELF_PAGEOFFSET(elf_bss);
 103	if (nbyte) {
 104		nbyte = ELF_MIN_ALIGN - nbyte;
 105		if (clear_user((void __user *) elf_bss, nbyte))
 106			return -EFAULT;
 107	}
 108	return 0;
 109}
 110
 111/* Let's use some macros to make this stack manipulation a little clearer */
 112#ifdef CONFIG_STACK_GROWSUP
 113#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 114#define STACK_ROUND(sp, items) \
 115	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 116#define STACK_ALLOC(sp, len) ({ \
 117	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 118	old_sp; })
 119#else
 120#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 121#define STACK_ROUND(sp, items) \
 122	(((unsigned long) (sp - items)) &~ 15UL)
 123#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 124#endif
 125
 126#ifndef ELF_BASE_PLATFORM
 127/*
 128 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 129 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 130 * will be copied to the user stack in the same manner as AT_PLATFORM.
 131 */
 132#define ELF_BASE_PLATFORM NULL
 133#endif
 134
 135static int
 136create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 137		unsigned long load_addr, unsigned long interp_load_addr)
 138{
 139	unsigned long p = bprm->p;
 140	int argc = bprm->argc;
 141	int envc = bprm->envc;
 142	elf_addr_t __user *argv;
 143	elf_addr_t __user *envp;
 144	elf_addr_t __user *sp;
 145	elf_addr_t __user *u_platform;
 146	elf_addr_t __user *u_base_platform;
 147	elf_addr_t __user *u_rand_bytes;
 148	const char *k_platform = ELF_PLATFORM;
 149	const char *k_base_platform = ELF_BASE_PLATFORM;
 150	unsigned char k_rand_bytes[16];
 151	int items;
 152	elf_addr_t *elf_info;
 153	int ei_index = 0;
 154	const struct cred *cred = current_cred();
 155	struct vm_area_struct *vma;
 156
 157	/*
 158	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
 159	 * evictions by the processes running on the same package. One
 160	 * thing we can do is to shuffle the initial stack for them.
 161	 */
 162
 163	p = arch_align_stack(p);
 164
 165	/*
 166	 * If this architecture has a platform capability string, copy it
 167	 * to userspace.  In some cases (Sparc), this info is impossible
 168	 * for userspace to get any other way, in others (i386) it is
 169	 * merely difficult.
 170	 */
 171	u_platform = NULL;
 172	if (k_platform) {
 173		size_t len = strlen(k_platform) + 1;
 174
 175		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 176		if (__copy_to_user(u_platform, k_platform, len))
 177			return -EFAULT;
 178	}
 179
 180	/*
 181	 * If this architecture has a "base" platform capability
 182	 * string, copy it to userspace.
 183	 */
 184	u_base_platform = NULL;
 185	if (k_base_platform) {
 186		size_t len = strlen(k_base_platform) + 1;
 187
 188		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 189		if (__copy_to_user(u_base_platform, k_base_platform, len))
 190			return -EFAULT;
 191	}
 192
 193	/*
 194	 * Generate 16 random bytes for userspace PRNG seeding.
 195	 */
 196	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 197	u_rand_bytes = (elf_addr_t __user *)
 198		       STACK_ALLOC(p, sizeof(k_rand_bytes));
 199	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 200		return -EFAULT;
 201
 202	/* Create the ELF interpreter info */
 203	elf_info = (elf_addr_t *)current->mm->saved_auxv;
 204	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 205#define NEW_AUX_ENT(id, val) \
 206	do { \
 207		elf_info[ei_index++] = id; \
 208		elf_info[ei_index++] = val; \
 209	} while (0)
 210
 211#ifdef ARCH_DLINFO
 212	/* 
 213	 * ARCH_DLINFO must come first so PPC can do its special alignment of
 214	 * AUXV.
 215	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 216	 * ARCH_DLINFO changes
 217	 */
 218	ARCH_DLINFO;
 219#endif
 220	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 221	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 222	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 223	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 224	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 225	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 226	NEW_AUX_ENT(AT_BASE, interp_load_addr);
 227	NEW_AUX_ENT(AT_FLAGS, 0);
 228	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 229	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 230	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 231	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 232	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 233 	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 234	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 235	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 236	if (k_platform) {
 237		NEW_AUX_ENT(AT_PLATFORM,
 238			    (elf_addr_t)(unsigned long)u_platform);
 239	}
 240	if (k_base_platform) {
 241		NEW_AUX_ENT(AT_BASE_PLATFORM,
 242			    (elf_addr_t)(unsigned long)u_base_platform);
 243	}
 244	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 245		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 246	}
 247#undef NEW_AUX_ENT
 248	/* AT_NULL is zero; clear the rest too */
 249	memset(&elf_info[ei_index], 0,
 250	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 251
 252	/* And advance past the AT_NULL entry.  */
 253	ei_index += 2;
 254
 255	sp = STACK_ADD(p, ei_index);
 256
 257	items = (argc + 1) + (envc + 1) + 1;
 258	bprm->p = STACK_ROUND(sp, items);
 259
 260	/* Point sp at the lowest address on the stack */
 261#ifdef CONFIG_STACK_GROWSUP
 262	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 263	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 264#else
 265	sp = (elf_addr_t __user *)bprm->p;
 266#endif
 267
 268
 269	/*
 270	 * Grow the stack manually; some architectures have a limit on how
 271	 * far ahead a user-space access may be in order to grow the stack.
 272	 */
 273	vma = find_extend_vma(current->mm, bprm->p);
 274	if (!vma)
 275		return -EFAULT;
 276
 277	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
 278	if (__put_user(argc, sp++))
 279		return -EFAULT;
 280	argv = sp;
 281	envp = argv + argc + 1;
 282
 283	/* Populate argv and envp */
 284	p = current->mm->arg_end = current->mm->arg_start;
 285	while (argc-- > 0) {
 286		size_t len;
 287		if (__put_user((elf_addr_t)p, argv++))
 288			return -EFAULT;
 289		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 290		if (!len || len > MAX_ARG_STRLEN)
 291			return -EINVAL;
 292		p += len;
 293	}
 294	if (__put_user(0, argv))
 295		return -EFAULT;
 296	current->mm->arg_end = current->mm->env_start = p;
 297	while (envc-- > 0) {
 298		size_t len;
 299		if (__put_user((elf_addr_t)p, envp++))
 300			return -EFAULT;
 301		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 302		if (!len || len > MAX_ARG_STRLEN)
 303			return -EINVAL;
 304		p += len;
 305	}
 306	if (__put_user(0, envp))
 307		return -EFAULT;
 308	current->mm->env_end = p;
 309
 310	/* Put the elf_info on the stack in the right place.  */
 311	sp = (elf_addr_t __user *)envp + 1;
 312	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 313		return -EFAULT;
 314	return 0;
 315}
 316
 317static unsigned long elf_map(struct file *filep, unsigned long addr,
 318		struct elf_phdr *eppnt, int prot, int type,
 319		unsigned long total_size)
 320{
 321	unsigned long map_addr;
 322	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 323	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 324	addr = ELF_PAGESTART(addr);
 325	size = ELF_PAGEALIGN(size);
 326
 327	/* mmap() will return -EINVAL if given a zero size, but a
 328	 * segment with zero filesize is perfectly valid */
 329	if (!size)
 330		return addr;
 331
 332	/*
 333	* total_size is the size of the ELF (interpreter) image.
 334	* The _first_ mmap needs to know the full size, otherwise
 335	* randomization might put this image into an overlapping
 336	* position with the ELF binary image. (since size < total_size)
 337	* So we first map the 'big' image - and unmap the remainder at
 338	* the end. (which unmap is needed for ELF images with holes.)
 339	*/
 340	if (total_size) {
 341		total_size = ELF_PAGEALIGN(total_size);
 342		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 343		if (!BAD_ADDR(map_addr))
 344			vm_munmap(map_addr+size, total_size-size);
 345	} else
 346		map_addr = vm_mmap(filep, addr, size, prot, type, off);
 347
 348	return(map_addr);
 349}
 350
 351static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 352{
 353	int i, first_idx = -1, last_idx = -1;
 354
 355	for (i = 0; i < nr; i++) {
 356		if (cmds[i].p_type == PT_LOAD) {
 357			last_idx = i;
 358			if (first_idx == -1)
 359				first_idx = i;
 360		}
 361	}
 362	if (first_idx == -1)
 363		return 0;
 364
 365	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 366				ELF_PAGESTART(cmds[first_idx].p_vaddr);
 367}
 368
 369
 370/* This is much more generalized than the library routine read function,
 371   so we keep this separate.  Technically the library read function
 372   is only provided so that we can read a.out libraries that have
 373   an ELF header */
 374
 375static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 376		struct file *interpreter, unsigned long *interp_map_addr,
 377		unsigned long no_base)
 378{
 379	struct elf_phdr *elf_phdata;
 380	struct elf_phdr *eppnt;
 381	unsigned long load_addr = 0;
 382	int load_addr_set = 0;
 383	unsigned long last_bss = 0, elf_bss = 0;
 384	unsigned long error = ~0UL;
 385	unsigned long total_size;
 386	int retval, i, size;
 387
 388	/* First of all, some simple consistency checks */
 389	if (interp_elf_ex->e_type != ET_EXEC &&
 390	    interp_elf_ex->e_type != ET_DYN)
 391		goto out;
 392	if (!elf_check_arch(interp_elf_ex))
 393		goto out;
 394	if (!interpreter->f_op || !interpreter->f_op->mmap)
 395		goto out;
 396
 397	/*
 398	 * If the size of this structure has changed, then punt, since
 399	 * we will be doing the wrong thing.
 400	 */
 401	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 402		goto out;
 403	if (interp_elf_ex->e_phnum < 1 ||
 404		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 405		goto out;
 406
 407	/* Now read in all of the header information */
 408	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 409	if (size > ELF_MIN_ALIGN)
 410		goto out;
 411	elf_phdata = kmalloc(size, GFP_KERNEL);
 412	if (!elf_phdata)
 413		goto out;
 414
 415	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 416			     (char *)elf_phdata, size);
 417	error = -EIO;
 418	if (retval != size) {
 419		if (retval < 0)
 420			error = retval;	
 421		goto out_close;
 422	}
 423
 424	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 425	if (!total_size) {
 426		error = -EINVAL;
 427		goto out_close;
 428	}
 429
 430	eppnt = elf_phdata;
 431	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 432		if (eppnt->p_type == PT_LOAD) {
 433			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 434			int elf_prot = 0;
 435			unsigned long vaddr = 0;
 436			unsigned long k, map_addr;
 437
 438			if (eppnt->p_flags & PF_R)
 439		    		elf_prot = PROT_READ;
 440			if (eppnt->p_flags & PF_W)
 441				elf_prot |= PROT_WRITE;
 442			if (eppnt->p_flags & PF_X)
 443				elf_prot |= PROT_EXEC;
 444			vaddr = eppnt->p_vaddr;
 445			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 446				elf_type |= MAP_FIXED;
 447			else if (no_base && interp_elf_ex->e_type == ET_DYN)
 448				load_addr = -vaddr;
 449
 450			map_addr = elf_map(interpreter, load_addr + vaddr,
 451					eppnt, elf_prot, elf_type, total_size);
 452			total_size = 0;
 453			if (!*interp_map_addr)
 454				*interp_map_addr = map_addr;
 455			error = map_addr;
 456			if (BAD_ADDR(map_addr))
 457				goto out_close;
 458
 459			if (!load_addr_set &&
 460			    interp_elf_ex->e_type == ET_DYN) {
 461				load_addr = map_addr - ELF_PAGESTART(vaddr);
 462				load_addr_set = 1;
 463			}
 464
 465			/*
 466			 * Check to see if the section's size will overflow the
 467			 * allowed task size. Note that p_filesz must always be
 468			 * <= p_memsize so it's only necessary to check p_memsz.
 469			 */
 470			k = load_addr + eppnt->p_vaddr;
 471			if (BAD_ADDR(k) ||
 472			    eppnt->p_filesz > eppnt->p_memsz ||
 473			    eppnt->p_memsz > TASK_SIZE ||
 474			    TASK_SIZE - eppnt->p_memsz < k) {
 475				error = -ENOMEM;
 476				goto out_close;
 477			}
 478
 479			/*
 480			 * Find the end of the file mapping for this phdr, and
 481			 * keep track of the largest address we see for this.
 482			 */
 483			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 484			if (k > elf_bss)
 485				elf_bss = k;
 486
 487			/*
 488			 * Do the same thing for the memory mapping - between
 489			 * elf_bss and last_bss is the bss section.
 490			 */
 491			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 492			if (k > last_bss)
 493				last_bss = k;
 494		}
 495	}
 496
 497	if (last_bss > elf_bss) {
 498		/*
 499		 * Now fill out the bss section.  First pad the last page up
 500		 * to the page boundary, and then perform a mmap to make sure
 501		 * that there are zero-mapped pages up to and including the
 502		 * last bss page.
 503		 */
 504		if (padzero(elf_bss)) {
 505			error = -EFAULT;
 506			goto out_close;
 507		}
 508
 509		/* What we have mapped so far */
 510		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 511
 512		/* Map the last of the bss segment */
 513		error = vm_brk(elf_bss, last_bss - elf_bss);
 514		if (BAD_ADDR(error))
 515			goto out_close;
 516	}
 517
 518	error = load_addr;
 519
 520out_close:
 521	kfree(elf_phdata);
 522out:
 523	return error;
 524}
 525
 526/*
 527 * These are the functions used to load ELF style executables and shared
 528 * libraries.  There is no binary dependent code anywhere else.
 529 */
 530
 531#define INTERPRETER_NONE 0
 532#define INTERPRETER_ELF 2
 533
 534#ifndef STACK_RND_MASK
 535#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
 536#endif
 537
 538static unsigned long randomize_stack_top(unsigned long stack_top)
 539{
 540	unsigned int random_variable = 0;
 541
 542	if ((current->flags & PF_RANDOMIZE) &&
 543		!(current->personality & ADDR_NO_RANDOMIZE)) {
 544		random_variable = get_random_int() & STACK_RND_MASK;
 545		random_variable <<= PAGE_SHIFT;
 546	}
 547#ifdef CONFIG_STACK_GROWSUP
 548	return PAGE_ALIGN(stack_top) + random_variable;
 549#else
 550	return PAGE_ALIGN(stack_top) - random_variable;
 551#endif
 552}
 553
 554static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 555{
 556	struct file *interpreter = NULL; /* to shut gcc up */
 557 	unsigned long load_addr = 0, load_bias = 0;
 558	int load_addr_set = 0;
 559	char * elf_interpreter = NULL;
 560	unsigned long error;
 561	struct elf_phdr *elf_ppnt, *elf_phdata;
 562	unsigned long elf_bss, elf_brk;
 563	int retval, i;
 564	unsigned int size;
 565	unsigned long elf_entry;
 566	unsigned long interp_load_addr = 0;
 567	unsigned long start_code, end_code, start_data, end_data;
 568	unsigned long reloc_func_desc __maybe_unused = 0;
 569	int executable_stack = EXSTACK_DEFAULT;
 570	unsigned long def_flags = 0;
 571	struct {
 572		struct elfhdr elf_ex;
 573		struct elfhdr interp_elf_ex;
 574	} *loc;
 575
 576	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 577	if (!loc) {
 578		retval = -ENOMEM;
 579		goto out_ret;
 580	}
 581	
 582	/* Get the exec-header */
 583	loc->elf_ex = *((struct elfhdr *)bprm->buf);
 584
 585	retval = -ENOEXEC;
 586	/* First of all, some simple consistency checks */
 587	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 588		goto out;
 589
 590	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 591		goto out;
 592	if (!elf_check_arch(&loc->elf_ex))
 593		goto out;
 594	if (!bprm->file->f_op || !bprm->file->f_op->mmap)
 595		goto out;
 596
 597	/* Now read in all of the header information */
 598	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 599		goto out;
 600	if (loc->elf_ex.e_phnum < 1 ||
 601	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 602		goto out;
 603	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 604	retval = -ENOMEM;
 605	elf_phdata = kmalloc(size, GFP_KERNEL);
 606	if (!elf_phdata)
 607		goto out;
 608
 609	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 610			     (char *)elf_phdata, size);
 611	if (retval != size) {
 612		if (retval >= 0)
 613			retval = -EIO;
 614		goto out_free_ph;
 615	}
 616
 617	elf_ppnt = elf_phdata;
 618	elf_bss = 0;
 619	elf_brk = 0;
 620
 621	start_code = ~0UL;
 622	end_code = 0;
 623	start_data = 0;
 624	end_data = 0;
 625
 626	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 627		if (elf_ppnt->p_type == PT_INTERP) {
 628			/* This is the program interpreter used for
 629			 * shared libraries - for now assume that this
 630			 * is an a.out format binary
 631			 */
 632			retval = -ENOEXEC;
 633			if (elf_ppnt->p_filesz > PATH_MAX || 
 634			    elf_ppnt->p_filesz < 2)
 635				goto out_free_ph;
 636
 637			retval = -ENOMEM;
 638			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 639						  GFP_KERNEL);
 640			if (!elf_interpreter)
 641				goto out_free_ph;
 642
 643			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 644					     elf_interpreter,
 645					     elf_ppnt->p_filesz);
 646			if (retval != elf_ppnt->p_filesz) {
 647				if (retval >= 0)
 648					retval = -EIO;
 649				goto out_free_interp;
 650			}
 651			/* make sure path is NULL terminated */
 652			retval = -ENOEXEC;
 653			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 654				goto out_free_interp;
 655
 656			interpreter = open_exec(elf_interpreter);
 657			retval = PTR_ERR(interpreter);
 658			if (IS_ERR(interpreter))
 659				goto out_free_interp;
 660
 661			/*
 662			 * If the binary is not readable then enforce
 663			 * mm->dumpable = 0 regardless of the interpreter's
 664			 * permissions.
 665			 */
 666			would_dump(bprm, interpreter);
 667
 668			retval = kernel_read(interpreter, 0, bprm->buf,
 669					     BINPRM_BUF_SIZE);
 670			if (retval != BINPRM_BUF_SIZE) {
 671				if (retval >= 0)
 672					retval = -EIO;
 673				goto out_free_dentry;
 674			}
 675
 676			/* Get the exec headers */
 677			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 678			break;
 679		}
 680		elf_ppnt++;
 681	}
 682
 683	elf_ppnt = elf_phdata;
 684	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 685		if (elf_ppnt->p_type == PT_GNU_STACK) {
 686			if (elf_ppnt->p_flags & PF_X)
 687				executable_stack = EXSTACK_ENABLE_X;
 688			else
 689				executable_stack = EXSTACK_DISABLE_X;
 690			break;
 691		}
 692
 693	/* Some simple consistency checks for the interpreter */
 694	if (elf_interpreter) {
 695		retval = -ELIBBAD;
 696		/* Not an ELF interpreter */
 697		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 698			goto out_free_dentry;
 699		/* Verify the interpreter has a valid arch */
 700		if (!elf_check_arch(&loc->interp_elf_ex))
 701			goto out_free_dentry;
 702	}
 703
 704	/* Flush all traces of the currently running executable */
 705	retval = flush_old_exec(bprm);
 706	if (retval)
 707		goto out_free_dentry;
 708
 709	/* OK, This is the point of no return */
 710	current->mm->def_flags = def_flags;
 711
 712	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
 713	   may depend on the personality.  */
 714	SET_PERSONALITY(loc->elf_ex);
 715	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 716		current->personality |= READ_IMPLIES_EXEC;
 717
 718	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 719		current->flags |= PF_RANDOMIZE;
 720
 721	setup_new_exec(bprm);
 722
 723	/* Do this so that we can load the interpreter, if need be.  We will
 724	   change some of these later */
 725	current->mm->free_area_cache = current->mm->mmap_base;
 726	current->mm->cached_hole_size = 0;
 727	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 728				 executable_stack);
 729	if (retval < 0) {
 730		send_sig(SIGKILL, current, 0);
 731		goto out_free_dentry;
 732	}
 733	
 734	current->mm->start_stack = bprm->p;
 735
 736	/* Now we do a little grungy work by mmapping the ELF image into
 737	   the correct location in memory. */
 738	for(i = 0, elf_ppnt = elf_phdata;
 739	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 740		int elf_prot = 0, elf_flags;
 741		unsigned long k, vaddr;
 742
 743		if (elf_ppnt->p_type != PT_LOAD)
 744			continue;
 745
 746		if (unlikely (elf_brk > elf_bss)) {
 747			unsigned long nbyte;
 748	            
 749			/* There was a PT_LOAD segment with p_memsz > p_filesz
 750			   before this one. Map anonymous pages, if needed,
 751			   and clear the area.  */
 752			retval = set_brk(elf_bss + load_bias,
 753					 elf_brk + load_bias);
 754			if (retval) {
 755				send_sig(SIGKILL, current, 0);
 756				goto out_free_dentry;
 757			}
 758			nbyte = ELF_PAGEOFFSET(elf_bss);
 759			if (nbyte) {
 760				nbyte = ELF_MIN_ALIGN - nbyte;
 761				if (nbyte > elf_brk - elf_bss)
 762					nbyte = elf_brk - elf_bss;
 763				if (clear_user((void __user *)elf_bss +
 764							load_bias, nbyte)) {
 765					/*
 766					 * This bss-zeroing can fail if the ELF
 767					 * file specifies odd protections. So
 768					 * we don't check the return value
 769					 */
 770				}
 771			}
 772		}
 773
 774		if (elf_ppnt->p_flags & PF_R)
 775			elf_prot |= PROT_READ;
 776		if (elf_ppnt->p_flags & PF_W)
 777			elf_prot |= PROT_WRITE;
 778		if (elf_ppnt->p_flags & PF_X)
 779			elf_prot |= PROT_EXEC;
 780
 781		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 782
 783		vaddr = elf_ppnt->p_vaddr;
 784		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 785			elf_flags |= MAP_FIXED;
 786		} else if (loc->elf_ex.e_type == ET_DYN) {
 787			/* Try and get dynamic programs out of the way of the
 788			 * default mmap base, as well as whatever program they
 789			 * might try to exec.  This is because the brk will
 790			 * follow the loader, and is not movable.  */
 791#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
 792			/* Memory randomization might have been switched off
 793			 * in runtime via sysctl.
 794			 * If that is the case, retain the original non-zero
 795			 * load_bias value in order to establish proper
 796			 * non-randomized mappings.
 797			 */
 798			if (current->flags & PF_RANDOMIZE)
 799				load_bias = 0;
 800			else
 801				load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 802#else
 803			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 804#endif
 805		}
 806
 807		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 808				elf_prot, elf_flags, 0);
 809		if (BAD_ADDR(error)) {
 810			send_sig(SIGKILL, current, 0);
 811			retval = IS_ERR((void *)error) ?
 812				PTR_ERR((void*)error) : -EINVAL;
 813			goto out_free_dentry;
 814		}
 815
 816		if (!load_addr_set) {
 817			load_addr_set = 1;
 818			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 819			if (loc->elf_ex.e_type == ET_DYN) {
 820				load_bias += error -
 821				             ELF_PAGESTART(load_bias + vaddr);
 822				load_addr += load_bias;
 823				reloc_func_desc = load_bias;
 824			}
 825		}
 826		k = elf_ppnt->p_vaddr;
 827		if (k < start_code)
 828			start_code = k;
 829		if (start_data < k)
 830			start_data = k;
 831
 832		/*
 833		 * Check to see if the section's size will overflow the
 834		 * allowed task size. Note that p_filesz must always be
 835		 * <= p_memsz so it is only necessary to check p_memsz.
 836		 */
 837		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 838		    elf_ppnt->p_memsz > TASK_SIZE ||
 839		    TASK_SIZE - elf_ppnt->p_memsz < k) {
 840			/* set_brk can never work. Avoid overflows. */
 841			send_sig(SIGKILL, current, 0);
 842			retval = -EINVAL;
 843			goto out_free_dentry;
 844		}
 845
 846		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 847
 848		if (k > elf_bss)
 849			elf_bss = k;
 850		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 851			end_code = k;
 852		if (end_data < k)
 853			end_data = k;
 854		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 855		if (k > elf_brk)
 856			elf_brk = k;
 857	}
 858
 859	loc->elf_ex.e_entry += load_bias;
 860	elf_bss += load_bias;
 861	elf_brk += load_bias;
 862	start_code += load_bias;
 863	end_code += load_bias;
 864	start_data += load_bias;
 865	end_data += load_bias;
 866
 867	/* Calling set_brk effectively mmaps the pages that we need
 868	 * for the bss and break sections.  We must do this before
 869	 * mapping in the interpreter, to make sure it doesn't wind
 870	 * up getting placed where the bss needs to go.
 871	 */
 872	retval = set_brk(elf_bss, elf_brk);
 873	if (retval) {
 874		send_sig(SIGKILL, current, 0);
 875		goto out_free_dentry;
 876	}
 877	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 878		send_sig(SIGSEGV, current, 0);
 879		retval = -EFAULT; /* Nobody gets to see this, but.. */
 880		goto out_free_dentry;
 881	}
 882
 883	if (elf_interpreter) {
 884		unsigned long uninitialized_var(interp_map_addr);
 885
 886		elf_entry = load_elf_interp(&loc->interp_elf_ex,
 887					    interpreter,
 888					    &interp_map_addr,
 889					    load_bias);
 890		if (!IS_ERR((void *)elf_entry)) {
 891			/*
 892			 * load_elf_interp() returns relocation
 893			 * adjustment
 894			 */
 895			interp_load_addr = elf_entry;
 896			elf_entry += loc->interp_elf_ex.e_entry;
 897		}
 898		if (BAD_ADDR(elf_entry)) {
 899			force_sig(SIGSEGV, current);
 900			retval = IS_ERR((void *)elf_entry) ?
 901					(int)elf_entry : -EINVAL;
 902			goto out_free_dentry;
 903		}
 904		reloc_func_desc = interp_load_addr;
 905
 906		allow_write_access(interpreter);
 907		fput(interpreter);
 908		kfree(elf_interpreter);
 909	} else {
 910		elf_entry = loc->elf_ex.e_entry;
 911		if (BAD_ADDR(elf_entry)) {
 912			force_sig(SIGSEGV, current);
 913			retval = -EINVAL;
 914			goto out_free_dentry;
 915		}
 916	}
 917
 918	kfree(elf_phdata);
 919
 920	set_binfmt(&elf_format);
 921
 922#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 923	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 924	if (retval < 0) {
 925		send_sig(SIGKILL, current, 0);
 926		goto out;
 927	}
 928#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 929
 930	install_exec_creds(bprm);
 931	retval = create_elf_tables(bprm, &loc->elf_ex,
 932			  load_addr, interp_load_addr);
 933	if (retval < 0) {
 934		send_sig(SIGKILL, current, 0);
 935		goto out;
 936	}
 937	/* N.B. passed_fileno might not be initialized? */
 938	current->mm->end_code = end_code;
 939	current->mm->start_code = start_code;
 940	current->mm->start_data = start_data;
 941	current->mm->end_data = end_data;
 942	current->mm->start_stack = bprm->p;
 943
 944#ifdef arch_randomize_brk
 945	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 946		current->mm->brk = current->mm->start_brk =
 947			arch_randomize_brk(current->mm);
 948#ifdef CONFIG_COMPAT_BRK
 949		current->brk_randomized = 1;
 950#endif
 951	}
 952#endif
 953
 954	if (current->personality & MMAP_PAGE_ZERO) {
 955		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 956		   and some applications "depend" upon this behavior.
 957		   Since we do not have the power to recompile these, we
 958		   emulate the SVr4 behavior. Sigh. */
 959		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 960				MAP_FIXED | MAP_PRIVATE, 0);
 961	}
 962
 963#ifdef ELF_PLAT_INIT
 964	/*
 965	 * The ABI may specify that certain registers be set up in special
 966	 * ways (on i386 %edx is the address of a DT_FINI function, for
 967	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 968	 * that the e_entry field is the address of the function descriptor
 969	 * for the startup routine, rather than the address of the startup
 970	 * routine itself.  This macro performs whatever initialization to
 971	 * the regs structure is required as well as any relocations to the
 972	 * function descriptor entries when executing dynamically links apps.
 973	 */
 974	ELF_PLAT_INIT(regs, reloc_func_desc);
 975#endif
 976
 977	start_thread(regs, elf_entry, bprm->p);
 978	retval = 0;
 979out:
 980	kfree(loc);
 981out_ret:
 982	return retval;
 983
 984	/* error cleanup */
 985out_free_dentry:
 986	allow_write_access(interpreter);
 987	if (interpreter)
 988		fput(interpreter);
 989out_free_interp:
 990	kfree(elf_interpreter);
 991out_free_ph:
 992	kfree(elf_phdata);
 993	goto out;
 994}
 995
 996/* This is really simpleminded and specialized - we are loading an
 997   a.out library that is given an ELF header. */
 998static int load_elf_library(struct file *file)
 999{
1000	struct elf_phdr *elf_phdata;
1001	struct elf_phdr *eppnt;
1002	unsigned long elf_bss, bss, len;
1003	int retval, error, i, j;
1004	struct elfhdr elf_ex;
1005
1006	error = -ENOEXEC;
1007	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1008	if (retval != sizeof(elf_ex))
1009		goto out;
1010
1011	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1012		goto out;
1013
1014	/* First of all, some simple consistency checks */
1015	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1016	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1017		goto out;
1018
1019	/* Now read in all of the header information */
1020
1021	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1022	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1023
1024	error = -ENOMEM;
1025	elf_phdata = kmalloc(j, GFP_KERNEL);
1026	if (!elf_phdata)
1027		goto out;
1028
1029	eppnt = elf_phdata;
1030	error = -ENOEXEC;
1031	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1032	if (retval != j)
1033		goto out_free_ph;
1034
1035	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1036		if ((eppnt + i)->p_type == PT_LOAD)
1037			j++;
1038	if (j != 1)
1039		goto out_free_ph;
1040
1041	while (eppnt->p_type != PT_LOAD)
1042		eppnt++;
1043
1044	/* Now use mmap to map the library into memory. */
1045	error = vm_mmap(file,
1046			ELF_PAGESTART(eppnt->p_vaddr),
1047			(eppnt->p_filesz +
1048			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1049			PROT_READ | PROT_WRITE | PROT_EXEC,
1050			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1051			(eppnt->p_offset -
1052			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1053	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1054		goto out_free_ph;
1055
1056	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1057	if (padzero(elf_bss)) {
1058		error = -EFAULT;
1059		goto out_free_ph;
1060	}
1061
1062	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1063			    ELF_MIN_ALIGN - 1);
1064	bss = eppnt->p_memsz + eppnt->p_vaddr;
1065	if (bss > len)
1066		vm_brk(len, bss - len);
1067	error = 0;
1068
1069out_free_ph:
1070	kfree(elf_phdata);
1071out:
1072	return error;
1073}
1074
1075#ifdef CONFIG_ELF_CORE
1076/*
1077 * ELF core dumper
1078 *
1079 * Modelled on fs/exec.c:aout_core_dump()
1080 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1081 */
1082
1083/*
1084 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1085 * that are useful for post-mortem analysis are included in every core dump.
1086 * In that way we ensure that the core dump is fully interpretable later
1087 * without matching up the same kernel and hardware config to see what PC values
1088 * meant. These special mappings include - vDSO, vsyscall, and other
1089 * architecture specific mappings
1090 */
1091static bool always_dump_vma(struct vm_area_struct *vma)
1092{
1093	/* Any vsyscall mappings? */
1094	if (vma == get_gate_vma(vma->vm_mm))
1095		return true;
1096	/*
1097	 * arch_vma_name() returns non-NULL for special architecture mappings,
1098	 * such as vDSO sections.
1099	 */
1100	if (arch_vma_name(vma))
1101		return true;
1102
1103	return false;
1104}
1105
1106/*
1107 * Decide what to dump of a segment, part, all or none.
1108 */
1109static unsigned long vma_dump_size(struct vm_area_struct *vma,
1110				   unsigned long mm_flags)
1111{
1112#define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1113
1114	/* always dump the vdso and vsyscall sections */
1115	if (always_dump_vma(vma))
1116		goto whole;
1117
1118	if (vma->vm_flags & VM_NODUMP)
1119		return 0;
1120
1121	/* Hugetlb memory check */
1122	if (vma->vm_flags & VM_HUGETLB) {
1123		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1124			goto whole;
1125		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1126			goto whole;
1127	}
1128
1129	/* Do not dump I/O mapped devices or special mappings */
1130	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1131		return 0;
1132
1133	/* By default, dump shared memory if mapped from an anonymous file. */
1134	if (vma->vm_flags & VM_SHARED) {
1135		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1136		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1137			goto whole;
1138		return 0;
1139	}
1140
1141	/* Dump segments that have been written to.  */
1142	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1143		goto whole;
1144	if (vma->vm_file == NULL)
1145		return 0;
1146
1147	if (FILTER(MAPPED_PRIVATE))
1148		goto whole;
1149
1150	/*
1151	 * If this looks like the beginning of a DSO or executable mapping,
1152	 * check for an ELF header.  If we find one, dump the first page to
1153	 * aid in determining what was mapped here.
1154	 */
1155	if (FILTER(ELF_HEADERS) &&
1156	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1157		u32 __user *header = (u32 __user *) vma->vm_start;
1158		u32 word;
1159		mm_segment_t fs = get_fs();
1160		/*
1161		 * Doing it this way gets the constant folded by GCC.
1162		 */
1163		union {
1164			u32 cmp;
1165			char elfmag[SELFMAG];
1166		} magic;
1167		BUILD_BUG_ON(SELFMAG != sizeof word);
1168		magic.elfmag[EI_MAG0] = ELFMAG0;
1169		magic.elfmag[EI_MAG1] = ELFMAG1;
1170		magic.elfmag[EI_MAG2] = ELFMAG2;
1171		magic.elfmag[EI_MAG3] = ELFMAG3;
1172		/*
1173		 * Switch to the user "segment" for get_user(),
1174		 * then put back what elf_core_dump() had in place.
1175		 */
1176		set_fs(USER_DS);
1177		if (unlikely(get_user(word, header)))
1178			word = 0;
1179		set_fs(fs);
1180		if (word == magic.cmp)
1181			return PAGE_SIZE;
1182	}
1183
1184#undef	FILTER
1185
1186	return 0;
1187
1188whole:
1189	return vma->vm_end - vma->vm_start;
1190}
1191
1192/* An ELF note in memory */
1193struct memelfnote
1194{
1195	const char *name;
1196	int type;
1197	unsigned int datasz;
1198	void *data;
1199};
1200
1201static int notesize(struct memelfnote *en)
1202{
1203	int sz;
1204
1205	sz = sizeof(struct elf_note);
1206	sz += roundup(strlen(en->name) + 1, 4);
1207	sz += roundup(en->datasz, 4);
1208
1209	return sz;
1210}
1211
1212#define DUMP_WRITE(addr, nr, foffset)	\
1213	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1214
1215static int alignfile(struct file *file, loff_t *foffset)
1216{
1217	static const char buf[4] = { 0, };
1218	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1219	return 1;
1220}
1221
1222static int writenote(struct memelfnote *men, struct file *file,
1223			loff_t *foffset)
1224{
1225	struct elf_note en;
1226	en.n_namesz = strlen(men->name) + 1;
1227	en.n_descsz = men->datasz;
1228	en.n_type = men->type;
1229
1230	DUMP_WRITE(&en, sizeof(en), foffset);
1231	DUMP_WRITE(men->name, en.n_namesz, foffset);
1232	if (!alignfile(file, foffset))
1233		return 0;
1234	DUMP_WRITE(men->data, men->datasz, foffset);
1235	if (!alignfile(file, foffset))
1236		return 0;
1237
1238	return 1;
1239}
1240#undef DUMP_WRITE
1241
1242static void fill_elf_header(struct elfhdr *elf, int segs,
1243			    u16 machine, u32 flags, u8 osabi)
1244{
1245	memset(elf, 0, sizeof(*elf));
1246
1247	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1248	elf->e_ident[EI_CLASS] = ELF_CLASS;
1249	elf->e_ident[EI_DATA] = ELF_DATA;
1250	elf->e_ident[EI_VERSION] = EV_CURRENT;
1251	elf->e_ident[EI_OSABI] = ELF_OSABI;
1252
1253	elf->e_type = ET_CORE;
1254	elf->e_machine = machine;
1255	elf->e_version = EV_CURRENT;
1256	elf->e_phoff = sizeof(struct elfhdr);
1257	elf->e_flags = flags;
1258	elf->e_ehsize = sizeof(struct elfhdr);
1259	elf->e_phentsize = sizeof(struct elf_phdr);
1260	elf->e_phnum = segs;
1261
1262	return;
1263}
1264
1265static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1266{
1267	phdr->p_type = PT_NOTE;
1268	phdr->p_offset = offset;
1269	phdr->p_vaddr = 0;
1270	phdr->p_paddr = 0;
1271	phdr->p_filesz = sz;
1272	phdr->p_memsz = 0;
1273	phdr->p_flags = 0;
1274	phdr->p_align = 0;
1275	return;
1276}
1277
1278static void fill_note(struct memelfnote *note, const char *name, int type, 
1279		unsigned int sz, void *data)
1280{
1281	note->name = name;
1282	note->type = type;
1283	note->datasz = sz;
1284	note->data = data;
1285	return;
1286}
1287
1288/*
1289 * fill up all the fields in prstatus from the given task struct, except
1290 * registers which need to be filled up separately.
1291 */
1292static void fill_prstatus(struct elf_prstatus *prstatus,
1293		struct task_struct *p, long signr)
1294{
1295	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1296	prstatus->pr_sigpend = p->pending.signal.sig[0];
1297	prstatus->pr_sighold = p->blocked.sig[0];
1298	rcu_read_lock();
1299	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1300	rcu_read_unlock();
1301	prstatus->pr_pid = task_pid_vnr(p);
1302	prstatus->pr_pgrp = task_pgrp_vnr(p);
1303	prstatus->pr_sid = task_session_vnr(p);
1304	if (thread_group_leader(p)) {
1305		struct task_cputime cputime;
1306
1307		/*
1308		 * This is the record for the group leader.  It shows the
1309		 * group-wide total, not its individual thread total.
1310		 */
1311		thread_group_cputime(p, &cputime);
1312		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1313		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1314	} else {
1315		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1316		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1317	}
1318	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1319	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1320}
1321
1322static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1323		       struct mm_struct *mm)
1324{
1325	const struct cred *cred;
1326	unsigned int i, len;
1327	
1328	/* first copy the parameters from user space */
1329	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1330
1331	len = mm->arg_end - mm->arg_start;
1332	if (len >= ELF_PRARGSZ)
1333		len = ELF_PRARGSZ-1;
1334	if (copy_from_user(&psinfo->pr_psargs,
1335		           (const char __user *)mm->arg_start, len))
1336		return -EFAULT;
1337	for(i = 0; i < len; i++)
1338		if (psinfo->pr_psargs[i] == 0)
1339			psinfo->pr_psargs[i] = ' ';
1340	psinfo->pr_psargs[len] = 0;
1341
1342	rcu_read_lock();
1343	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1344	rcu_read_unlock();
1345	psinfo->pr_pid = task_pid_vnr(p);
1346	psinfo->pr_pgrp = task_pgrp_vnr(p);
1347	psinfo->pr_sid = task_session_vnr(p);
1348
1349	i = p->state ? ffz(~p->state) + 1 : 0;
1350	psinfo->pr_state = i;
1351	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1352	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1353	psinfo->pr_nice = task_nice(p);
1354	psinfo->pr_flag = p->flags;
1355	rcu_read_lock();
1356	cred = __task_cred(p);
1357	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1358	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1359	rcu_read_unlock();
1360	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1361	
1362	return 0;
1363}
1364
1365static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1366{
1367	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1368	int i = 0;
1369	do
1370		i += 2;
1371	while (auxv[i - 2] != AT_NULL);
1372	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1373}
1374
1375#ifdef CORE_DUMP_USE_REGSET
1376#include <linux/regset.h>
1377
1378struct elf_thread_core_info {
1379	struct elf_thread_core_info *next;
1380	struct task_struct *task;
1381	struct elf_prstatus prstatus;
1382	struct memelfnote notes[0];
1383};
1384
1385struct elf_note_info {
1386	struct elf_thread_core_info *thread;
1387	struct memelfnote psinfo;
1388	struct memelfnote auxv;
1389	size_t size;
1390	int thread_notes;
1391};
1392
1393/*
1394 * When a regset has a writeback hook, we call it on each thread before
1395 * dumping user memory.  On register window machines, this makes sure the
1396 * user memory backing the register data is up to date before we read it.
1397 */
1398static void do_thread_regset_writeback(struct task_struct *task,
1399				       const struct user_regset *regset)
1400{
1401	if (regset->writeback)
1402		regset->writeback(task, regset, 1);
1403}
1404
1405#ifndef PR_REG_SIZE
1406#define PR_REG_SIZE(S) sizeof(S)
1407#endif
1408
1409#ifndef PRSTATUS_SIZE
1410#define PRSTATUS_SIZE(S) sizeof(S)
1411#endif
1412
1413#ifndef PR_REG_PTR
1414#define PR_REG_PTR(S) (&((S)->pr_reg))
1415#endif
1416
1417#ifndef SET_PR_FPVALID
1418#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1419#endif
1420
1421static int fill_thread_core_info(struct elf_thread_core_info *t,
1422				 const struct user_regset_view *view,
1423				 long signr, size_t *total)
1424{
1425	unsigned int i;
1426
1427	/*
1428	 * NT_PRSTATUS is the one special case, because the regset data
1429	 * goes into the pr_reg field inside the note contents, rather
1430	 * than being the whole note contents.  We fill the reset in here.
1431	 * We assume that regset 0 is NT_PRSTATUS.
1432	 */
1433	fill_prstatus(&t->prstatus, t->task, signr);
1434	(void) view->regsets[0].get(t->task, &view->regsets[0],
1435				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1436				    PR_REG_PTR(&t->prstatus), NULL);
1437
1438	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1439		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1440	*total += notesize(&t->notes[0]);
1441
1442	do_thread_regset_writeback(t->task, &view->regsets[0]);
1443
1444	/*
1445	 * Each other regset might generate a note too.  For each regset
1446	 * that has no core_note_type or is inactive, we leave t->notes[i]
1447	 * all zero and we'll know to skip writing it later.
1448	 */
1449	for (i = 1; i < view->n; ++i) {
1450		const struct user_regset *regset = &view->regsets[i];
1451		do_thread_regset_writeback(t->task, regset);
1452		if (regset->core_note_type && regset->get &&
1453		    (!regset->active || regset->active(t->task, regset))) {
1454			int ret;
1455			size_t size = regset->n * regset->size;
1456			void *data = kmalloc(size, GFP_KERNEL);
1457			if (unlikely(!data))
1458				return 0;
1459			ret = regset->get(t->task, regset,
1460					  0, size, data, NULL);
1461			if (unlikely(ret))
1462				kfree(data);
1463			else {
1464				if (regset->core_note_type != NT_PRFPREG)
1465					fill_note(&t->notes[i], "LINUX",
1466						  regset->core_note_type,
1467						  size, data);
1468				else {
1469					SET_PR_FPVALID(&t->prstatus, 1);
1470					fill_note(&t->notes[i], "CORE",
1471						  NT_PRFPREG, size, data);
1472				}
1473				*total += notesize(&t->notes[i]);
1474			}
1475		}
1476	}
1477
1478	return 1;
1479}
1480
1481static int fill_note_info(struct elfhdr *elf, int phdrs,
1482			  struct elf_note_info *info,
1483			  long signr, struct pt_regs *regs)
1484{
1485	struct task_struct *dump_task = current;
1486	const struct user_regset_view *view = task_user_regset_view(dump_task);
1487	struct elf_thread_core_info *t;
1488	struct elf_prpsinfo *psinfo;
1489	struct core_thread *ct;
1490	unsigned int i;
1491
1492	info->size = 0;
1493	info->thread = NULL;
1494
1495	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1496	if (psinfo == NULL)
1497		return 0;
1498
1499	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1500
1501	/*
1502	 * Figure out how many notes we're going to need for each thread.
1503	 */
1504	info->thread_notes = 0;
1505	for (i = 0; i < view->n; ++i)
1506		if (view->regsets[i].core_note_type != 0)
1507			++info->thread_notes;
1508
1509	/*
1510	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1511	 * since it is our one special case.
1512	 */
1513	if (unlikely(info->thread_notes == 0) ||
1514	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1515		WARN_ON(1);
1516		return 0;
1517	}
1518
1519	/*
1520	 * Initialize the ELF file header.
1521	 */
1522	fill_elf_header(elf, phdrs,
1523			view->e_machine, view->e_flags, view->ei_osabi);
1524
1525	/*
1526	 * Allocate a structure for each thread.
1527	 */
1528	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1529		t = kzalloc(offsetof(struct elf_thread_core_info,
1530				     notes[info->thread_notes]),
1531			    GFP_KERNEL);
1532		if (unlikely(!t))
1533			return 0;
1534
1535		t->task = ct->task;
1536		if (ct->task == dump_task || !info->thread) {
1537			t->next = info->thread;
1538			info->thread = t;
1539		} else {
1540			/*
1541			 * Make sure to keep the original task at
1542			 * the head of the list.
1543			 */
1544			t->next = info->thread->next;
1545			info->thread->next = t;
1546		}
1547	}
1548
1549	/*
1550	 * Now fill in each thread's information.
1551	 */
1552	for (t = info->thread; t != NULL; t = t->next)
1553		if (!fill_thread_core_info(t, view, signr, &info->size))
1554			return 0;
1555
1556	/*
1557	 * Fill in the two process-wide notes.
1558	 */
1559	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1560	info->size += notesize(&info->psinfo);
1561
1562	fill_auxv_note(&info->auxv, current->mm);
1563	info->size += notesize(&info->auxv);
1564
1565	return 1;
1566}
1567
1568static size_t get_note_info_size(struct elf_note_info *info)
1569{
1570	return info->size;
1571}
1572
1573/*
1574 * Write all the notes for each thread.  When writing the first thread, the
1575 * process-wide notes are interleaved after the first thread-specific note.
1576 */
1577static int write_note_info(struct elf_note_info *info,
1578			   struct file *file, loff_t *foffset)
1579{
1580	bool first = 1;
1581	struct elf_thread_core_info *t = info->thread;
1582
1583	do {
1584		int i;
1585
1586		if (!writenote(&t->notes[0], file, foffset))
1587			return 0;
1588
1589		if (first && !writenote(&info->psinfo, file, foffset))
1590			return 0;
1591		if (first && !writenote(&info->auxv, file, foffset))
1592			return 0;
1593
1594		for (i = 1; i < info->thread_notes; ++i)
1595			if (t->notes[i].data &&
1596			    !writenote(&t->notes[i], file, foffset))
1597				return 0;
1598
1599		first = 0;
1600		t = t->next;
1601	} while (t);
1602
1603	return 1;
1604}
1605
1606static void free_note_info(struct elf_note_info *info)
1607{
1608	struct elf_thread_core_info *threads = info->thread;
1609	while (threads) {
1610		unsigned int i;
1611		struct elf_thread_core_info *t = threads;
1612		threads = t->next;
1613		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1614		for (i = 1; i < info->thread_notes; ++i)
1615			kfree(t->notes[i].data);
1616		kfree(t);
1617	}
1618	kfree(info->psinfo.data);
1619}
1620
1621#else
1622
1623/* Here is the structure in which status of each thread is captured. */
1624struct elf_thread_status
1625{
1626	struct list_head list;
1627	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1628	elf_fpregset_t fpu;		/* NT_PRFPREG */
1629	struct task_struct *thread;
1630#ifdef ELF_CORE_COPY_XFPREGS
1631	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1632#endif
1633	struct memelfnote notes[3];
1634	int num_notes;
1635};
1636
1637/*
1638 * In order to add the specific thread information for the elf file format,
1639 * we need to keep a linked list of every threads pr_status and then create
1640 * a single section for them in the final core file.
1641 */
1642static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1643{
1644	int sz = 0;
1645	struct task_struct *p = t->thread;
1646	t->num_notes = 0;
1647
1648	fill_prstatus(&t->prstatus, p, signr);
1649	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);	
1650	
1651	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1652		  &(t->prstatus));
1653	t->num_notes++;
1654	sz += notesize(&t->notes[0]);
1655
1656	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1657								&t->fpu))) {
1658		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1659			  &(t->fpu));
1660		t->num_notes++;
1661		sz += notesize(&t->notes[1]);
1662	}
1663
1664#ifdef ELF_CORE_COPY_XFPREGS
1665	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1666		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1667			  sizeof(t->xfpu), &t->xfpu);
1668		t->num_notes++;
1669		sz += notesize(&t->notes[2]);
1670	}
1671#endif	
1672	return sz;
1673}
1674
1675struct elf_note_info {
1676	struct memelfnote *notes;
1677	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1678	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1679	struct list_head thread_list;
1680	elf_fpregset_t *fpu;
1681#ifdef ELF_CORE_COPY_XFPREGS
1682	elf_fpxregset_t *xfpu;
1683#endif
1684	int thread_status_size;
1685	int numnote;
1686};
1687
1688static int elf_note_info_init(struct elf_note_info *info)
1689{
1690	memset(info, 0, sizeof(*info));
1691	INIT_LIST_HEAD(&info->thread_list);
1692
1693	/* Allocate space for six ELF notes */
1694	info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1695	if (!info->notes)
1696		return 0;
1697	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1698	if (!info->psinfo)
1699		return 0;
1700	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1701	if (!info->prstatus)
1702		return 0;
1703	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1704	if (!info->fpu)
1705		return 0;
1706#ifdef ELF_CORE_COPY_XFPREGS
1707	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1708	if (!info->xfpu)
1709		return 0;
1710#endif
1711	return 1;
1712}
1713
1714static int fill_note_info(struct elfhdr *elf, int phdrs,
1715			  struct elf_note_info *info,
1716			  long signr, struct pt_regs *regs)
1717{
1718	struct list_head *t;
1719
1720	if (!elf_note_info_init(info))
1721		return 0;
1722
1723	if (signr) {
1724		struct core_thread *ct;
1725		struct elf_thread_status *ets;
1726
1727		for (ct = current->mm->core_state->dumper.next;
1728						ct; ct = ct->next) {
1729			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1730			if (!ets)
1731				return 0;
1732
1733			ets->thread = ct->task;
1734			list_add(&ets->list, &info->thread_list);
1735		}
1736
1737		list_for_each(t, &info->thread_list) {
1738			int sz;
1739
1740			ets = list_entry(t, struct elf_thread_status, list);
1741			sz = elf_dump_thread_status(signr, ets);
1742			info->thread_status_size += sz;
1743		}
1744	}
1745	/* now collect the dump for the current */
1746	memset(info->prstatus, 0, sizeof(*info->prstatus));
1747	fill_prstatus(info->prstatus, current, signr);
1748	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1749
1750	/* Set up header */
1751	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1752
1753	/*
1754	 * Set up the notes in similar form to SVR4 core dumps made
1755	 * with info from their /proc.
1756	 */
1757
1758	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1759		  sizeof(*info->prstatus), info->prstatus);
1760	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1761	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1762		  sizeof(*info->psinfo), info->psinfo);
1763
1764	info->numnote = 2;
1765
1766	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1767
1768	/* Try to dump the FPU. */
1769	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1770							       info->fpu);
1771	if (info->prstatus->pr_fpvalid)
1772		fill_note(info->notes + info->numnote++,
1773			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1774#ifdef ELF_CORE_COPY_XFPREGS
1775	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1776		fill_note(info->notes + info->numnote++,
1777			  "LINUX", ELF_CORE_XFPREG_TYPE,
1778			  sizeof(*info->xfpu), info->xfpu);
1779#endif
1780
1781	return 1;
1782}
1783
1784static size_t get_note_info_size(struct elf_note_info *info)
1785{
1786	int sz = 0;
1787	int i;
1788
1789	for (i = 0; i < info->numnote; i++)
1790		sz += notesize(info->notes + i);
1791
1792	sz += info->thread_status_size;
1793
1794	return sz;
1795}
1796
1797static int write_note_info(struct elf_note_info *info,
1798			   struct file *file, loff_t *foffset)
1799{
1800	int i;
1801	struct list_head *t;
1802
1803	for (i = 0; i < info->numnote; i++)
1804		if (!writenote(info->notes + i, file, foffset))
1805			return 0;
1806
1807	/* write out the thread status notes section */
1808	list_for_each(t, &info->thread_list) {
1809		struct elf_thread_status *tmp =
1810				list_entry(t, struct elf_thread_status, list);
1811
1812		for (i = 0; i < tmp->num_notes; i++)
1813			if (!writenote(&tmp->notes[i], file, foffset))
1814				return 0;
1815	}
1816
1817	return 1;
1818}
1819
1820static void free_note_info(struct elf_note_info *info)
1821{
1822	while (!list_empty(&info->thread_list)) {
1823		struct list_head *tmp = info->thread_list.next;
1824		list_del(tmp);
1825		kfree(list_entry(tmp, struct elf_thread_status, list));
1826	}
1827
1828	kfree(info->prstatus);
1829	kfree(info->psinfo);
1830	kfree(info->notes);
1831	kfree(info->fpu);
1832#ifdef ELF_CORE_COPY_XFPREGS
1833	kfree(info->xfpu);
1834#endif
1835}
1836
1837#endif
1838
1839static struct vm_area_struct *first_vma(struct task_struct *tsk,
1840					struct vm_area_struct *gate_vma)
1841{
1842	struct vm_area_struct *ret = tsk->mm->mmap;
1843
1844	if (ret)
1845		return ret;
1846	return gate_vma;
1847}
1848/*
1849 * Helper function for iterating across a vma list.  It ensures that the caller
1850 * will visit `gate_vma' prior to terminating the search.
1851 */
1852static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1853					struct vm_area_struct *gate_vma)
1854{
1855	struct vm_area_struct *ret;
1856
1857	ret = this_vma->vm_next;
1858	if (ret)
1859		return ret;
1860	if (this_vma == gate_vma)
1861		return NULL;
1862	return gate_vma;
1863}
1864
1865static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1866			     elf_addr_t e_shoff, int segs)
1867{
1868	elf->e_shoff = e_shoff;
1869	elf->e_shentsize = sizeof(*shdr4extnum);
1870	elf->e_shnum = 1;
1871	elf->e_shstrndx = SHN_UNDEF;
1872
1873	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1874
1875	shdr4extnum->sh_type = SHT_NULL;
1876	shdr4extnum->sh_size = elf->e_shnum;
1877	shdr4extnum->sh_link = elf->e_shstrndx;
1878	shdr4extnum->sh_info = segs;
1879}
1880
1881static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1882				     unsigned long mm_flags)
1883{
1884	struct vm_area_struct *vma;
1885	size_t size = 0;
1886
1887	for (vma = first_vma(current, gate_vma); vma != NULL;
1888	     vma = next_vma(vma, gate_vma))
1889		size += vma_dump_size(vma, mm_flags);
1890	return size;
1891}
1892
1893/*
1894 * Actual dumper
1895 *
1896 * This is a two-pass process; first we find the offsets of the bits,
1897 * and then they are actually written out.  If we run out of core limit
1898 * we just truncate.
1899 */
1900static int elf_core_dump(struct coredump_params *cprm)
1901{
1902	int has_dumped = 0;
1903	mm_segment_t fs;
1904	int segs;
1905	size_t size = 0;
1906	struct vm_area_struct *vma, *gate_vma;
1907	struct elfhdr *elf = NULL;
1908	loff_t offset = 0, dataoff, foffset;
1909	struct elf_note_info info;
1910	struct elf_phdr *phdr4note = NULL;
1911	struct elf_shdr *shdr4extnum = NULL;
1912	Elf_Half e_phnum;
1913	elf_addr_t e_shoff;
1914
1915	/*
1916	 * We no longer stop all VM operations.
1917	 * 
1918	 * This is because those proceses that could possibly change map_count
1919	 * or the mmap / vma pages are now blocked in do_exit on current
1920	 * finishing this core dump.
1921	 *
1922	 * Only ptrace can touch these memory addresses, but it doesn't change
1923	 * the map_count or the pages allocated. So no possibility of crashing
1924	 * exists while dumping the mm->vm_next areas to the core file.
1925	 */
1926  
1927	/* alloc memory for large data structures: too large to be on stack */
1928	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1929	if (!elf)
1930		goto out;
1931	/*
1932	 * The number of segs are recored into ELF header as 16bit value.
1933	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1934	 */
1935	segs = current->mm->map_count;
1936	segs += elf_core_extra_phdrs();
1937
1938	gate_vma = get_gate_vma(current->mm);
1939	if (gate_vma != NULL)
1940		segs++;
1941
1942	/* for notes section */
1943	segs++;
1944
1945	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1946	 * this, kernel supports extended numbering. Have a look at
1947	 * include/linux/elf.h for further information. */
1948	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1949
1950	/*
1951	 * Collect all the non-memory information about the process for the
1952	 * notes.  This also sets up the file header.
1953	 */
1954	if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1955		goto cleanup;
1956
1957	has_dumped = 1;
1958	current->flags |= PF_DUMPCORE;
1959  
1960	fs = get_fs();
1961	set_fs(KERNEL_DS);
1962
1963	offset += sizeof(*elf);				/* Elf header */
1964	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
1965	foffset = offset;
1966
1967	/* Write notes phdr entry */
1968	{
1969		size_t sz = get_note_info_size(&info);
1970
1971		sz += elf_coredump_extra_notes_size();
1972
1973		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1974		if (!phdr4note)
1975			goto end_coredump;
1976
1977		fill_elf_note_phdr(phdr4note, sz, offset);
1978		offset += sz;
1979	}
1980
1981	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1982
1983	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1984	offset += elf_core_extra_data_size();
1985	e_shoff = offset;
1986
1987	if (e_phnum == PN_XNUM) {
1988		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1989		if (!shdr4extnum)
1990			goto end_coredump;
1991		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1992	}
1993
1994	offset = dataoff;
1995
1996	size += sizeof(*elf);
1997	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1998		goto end_coredump;
1999
2000	size += sizeof(*phdr4note);
2001	if (size > cprm->limit
2002	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2003		goto end_coredump;
2004
2005	/* Write program headers for segments dump */
2006	for (vma = first_vma(current, gate_vma); vma != NULL;
2007			vma = next_vma(vma, gate_vma)) {
2008		struct elf_phdr phdr;
2009
2010		phdr.p_type = PT_LOAD;
2011		phdr.p_offset = offset;
2012		phdr.p_vaddr = vma->vm_start;
2013		phdr.p_paddr = 0;
2014		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2015		phdr.p_memsz = vma->vm_end - vma->vm_start;
2016		offset += phdr.p_filesz;
2017		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2018		if (vma->vm_flags & VM_WRITE)
2019			phdr.p_flags |= PF_W;
2020		if (vma->vm_flags & VM_EXEC)
2021			phdr.p_flags |= PF_X;
2022		phdr.p_align = ELF_EXEC_PAGESIZE;
2023
2024		size += sizeof(phdr);
2025		if (size > cprm->limit
2026		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2027			goto end_coredump;
2028	}
2029
2030	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2031		goto end_coredump;
2032
2033 	/* write out the notes section */
2034	if (!write_note_info(&info, cprm->file, &foffset))
2035		goto end_coredump;
2036
2037	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2038		goto end_coredump;
2039
2040	/* Align to page */
2041	if (!dump_seek(cprm->file, dataoff - foffset))
2042		goto end_coredump;
2043
2044	for (vma = first_vma(current, gate_vma); vma != NULL;
2045			vma = next_vma(vma, gate_vma)) {
2046		unsigned long addr;
2047		unsigned long end;
2048
2049		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2050
2051		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2052			struct page *page;
2053			int stop;
2054
2055			page = get_dump_page(addr);
2056			if (page) {
2057				void *kaddr = kmap(page);
2058				stop = ((size += PAGE_SIZE) > cprm->limit) ||
2059					!dump_write(cprm->file, kaddr,
2060						    PAGE_SIZE);
2061				kunmap(page);
2062				page_cache_release(page);
2063			} else
2064				stop = !dump_seek(cprm->file, PAGE_SIZE);
2065			if (stop)
2066				goto end_coredump;
2067		}
2068	}
2069
2070	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2071		goto end_coredump;
2072
2073	if (e_phnum == PN_XNUM) {
2074		size += sizeof(*shdr4extnum);
2075		if (size > cprm->limit
2076		    || !dump_write(cprm->file, shdr4extnum,
2077				   sizeof(*shdr4extnum)))
2078			goto end_coredump;
2079	}
2080
2081end_coredump:
2082	set_fs(fs);
2083
2084cleanup:
2085	free_note_info(&info);
2086	kfree(shdr4extnum);
2087	kfree(phdr4note);
2088	kfree(elf);
2089out:
2090	return has_dumped;
2091}
2092
2093#endif		/* CONFIG_ELF_CORE */
2094
2095static int __init init_elf_binfmt(void)
2096{
2097	register_binfmt(&elf_format);
2098	return 0;
2099}
2100
2101static void __exit exit_elf_binfmt(void)
2102{
2103	/* Remove the COFF and ELF loaders. */
2104	unregister_binfmt(&elf_format);
2105}
2106
2107core_initcall(init_elf_binfmt);
2108module_exit(exit_elf_binfmt);
2109MODULE_LICENSE("GPL");