Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * sysctl.c: General linux system control interface
   4 *
   5 * Begun 24 March 1995, Stephen Tweedie
   6 * Added /proc support, Dec 1995
   7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
   8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
   9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
  10 * Dynamic registration fixes, Stephen Tweedie.
  11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
  12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
  13 *  Horn.
  14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
  15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
  17 *  Wendling.
  18 * The list_for_each() macro wasn't appropriate for the sysctl loop.
  19 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
  20 */
  21
  22#include <linux/module.h>
  23#include <linux/mm.h>
  24#include <linux/swap.h>
  25#include <linux/slab.h>
  26#include <linux/sysctl.h>
  27#include <linux/bitmap.h>
  28#include <linux/signal.h>
  29#include <linux/panic.h>
  30#include <linux/printk.h>
  31#include <linux/proc_fs.h>
  32#include <linux/security.h>
  33#include <linux/ctype.h>
  34#include <linux/kmemleak.h>
  35#include <linux/filter.h>
  36#include <linux/fs.h>
  37#include <linux/init.h>
  38#include <linux/kernel.h>
  39#include <linux/kobject.h>
  40#include <linux/net.h>
  41#include <linux/sysrq.h>
  42#include <linux/highuid.h>
  43#include <linux/writeback.h>
  44#include <linux/ratelimit.h>
  45#include <linux/compaction.h>
  46#include <linux/hugetlb.h>
  47#include <linux/initrd.h>
  48#include <linux/key.h>
  49#include <linux/times.h>
  50#include <linux/limits.h>
  51#include <linux/dcache.h>
  52#include <linux/syscalls.h>
  53#include <linux/vmstat.h>
  54#include <linux/nfs_fs.h>
  55#include <linux/acpi.h>
  56#include <linux/reboot.h>
  57#include <linux/ftrace.h>
  58#include <linux/perf_event.h>
  59#include <linux/oom.h>
  60#include <linux/kmod.h>
  61#include <linux/capability.h>
  62#include <linux/binfmts.h>
  63#include <linux/sched/sysctl.h>
  64#include <linux/mount.h>
  65#include <linux/userfaultfd_k.h>
  66#include <linux/pid.h>
  67
  68#include "../lib/kstrtox.h"
  69
  70#include <linux/uaccess.h>
  71#include <asm/processor.h>
  72
  73#ifdef CONFIG_X86
  74#include <asm/nmi.h>
  75#include <asm/stacktrace.h>
  76#include <asm/io.h>
  77#endif
  78#ifdef CONFIG_SPARC
  79#include <asm/setup.h>
  80#endif
  81#ifdef CONFIG_RT_MUTEXES
  82#include <linux/rtmutex.h>
  83#endif
  84
  85/* shared constants to be used in various sysctls */
  86const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
  87EXPORT_SYMBOL(sysctl_vals);
  88
  89const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
  90EXPORT_SYMBOL_GPL(sysctl_long_vals);
  91
  92#if defined(CONFIG_SYSCTL)
  93
  94/* Constants used for minimum and maximum */
  95
  96#ifdef CONFIG_PERF_EVENTS
  97static const int six_hundred_forty_kb = 640 * 1024;
  98#endif
  99
 100
 101static const int ngroups_max = NGROUPS_MAX;
 102static const int cap_last_cap = CAP_LAST_CAP;
 103
 104#ifdef CONFIG_PROC_SYSCTL
 105
 106/**
 107 * enum sysctl_writes_mode - supported sysctl write modes
 108 *
 109 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
 110 *	to be written, and multiple writes on the same sysctl file descriptor
 111 *	will rewrite the sysctl value, regardless of file position. No warning
 112 *	is issued when the initial position is not 0.
 113 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
 114 *	not 0.
 115 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
 116 *	file position 0 and the value must be fully contained in the buffer
 117 *	sent to the write syscall. If dealing with strings respect the file
 118 *	position, but restrict this to the max length of the buffer, anything
 119 *	passed the max length will be ignored. Multiple writes will append
 120 *	to the buffer.
 121 *
 122 * These write modes control how current file position affects the behavior of
 123 * updating sysctl values through the proc interface on each write.
 124 */
 125enum sysctl_writes_mode {
 126	SYSCTL_WRITES_LEGACY		= -1,
 127	SYSCTL_WRITES_WARN		= 0,
 128	SYSCTL_WRITES_STRICT		= 1,
 129};
 130
 131static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
 132#endif /* CONFIG_PROC_SYSCTL */
 133
 134#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
 135    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
 136int sysctl_legacy_va_layout;
 137#endif
 138
 139#endif /* CONFIG_SYSCTL */
 140
 141/*
 142 * /proc/sys support
 143 */
 144
 145#ifdef CONFIG_PROC_SYSCTL
 146
 147static int _proc_do_string(char *data, int maxlen, int write,
 148		char *buffer, size_t *lenp, loff_t *ppos)
 149{
 150	size_t len;
 151	char c, *p;
 152
 153	if (!data || !maxlen || !*lenp) {
 154		*lenp = 0;
 155		return 0;
 156	}
 157
 158	if (write) {
 159		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
 160			/* Only continue writes not past the end of buffer. */
 161			len = strlen(data);
 162			if (len > maxlen - 1)
 163				len = maxlen - 1;
 164
 165			if (*ppos > len)
 166				return 0;
 167			len = *ppos;
 168		} else {
 169			/* Start writing from beginning of buffer. */
 170			len = 0;
 171		}
 172
 173		*ppos += *lenp;
 174		p = buffer;
 175		while ((p - buffer) < *lenp && len < maxlen - 1) {
 176			c = *(p++);
 177			if (c == 0 || c == '\n')
 178				break;
 179			data[len++] = c;
 180		}
 181		data[len] = 0;
 182	} else {
 183		len = strlen(data);
 184		if (len > maxlen)
 185			len = maxlen;
 186
 187		if (*ppos > len) {
 188			*lenp = 0;
 189			return 0;
 190		}
 191
 192		data += *ppos;
 193		len  -= *ppos;
 194
 195		if (len > *lenp)
 196			len = *lenp;
 197		if (len)
 198			memcpy(buffer, data, len);
 199		if (len < *lenp) {
 200			buffer[len] = '\n';
 201			len++;
 202		}
 203		*lenp = len;
 204		*ppos += len;
 205	}
 206	return 0;
 207}
 208
 209static void warn_sysctl_write(struct ctl_table *table)
 210{
 211	pr_warn_once("%s wrote to %s when file position was not 0!\n"
 212		"This will not be supported in the future. To silence this\n"
 213		"warning, set kernel.sysctl_writes_strict = -1\n",
 214		current->comm, table->procname);
 215}
 216
 217/**
 218 * proc_first_pos_non_zero_ignore - check if first position is allowed
 219 * @ppos: file position
 220 * @table: the sysctl table
 221 *
 222 * Returns true if the first position is non-zero and the sysctl_writes_strict
 223 * mode indicates this is not allowed for numeric input types. String proc
 224 * handlers can ignore the return value.
 225 */
 226static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
 227					   struct ctl_table *table)
 228{
 229	if (!*ppos)
 230		return false;
 231
 232	switch (sysctl_writes_strict) {
 233	case SYSCTL_WRITES_STRICT:
 234		return true;
 235	case SYSCTL_WRITES_WARN:
 236		warn_sysctl_write(table);
 237		return false;
 238	default:
 239		return false;
 240	}
 241}
 242
 243/**
 244 * proc_dostring - read a string sysctl
 245 * @table: the sysctl table
 246 * @write: %TRUE if this is a write to the sysctl file
 247 * @buffer: the user buffer
 248 * @lenp: the size of the user buffer
 249 * @ppos: file position
 250 *
 251 * Reads/writes a string from/to the user buffer. If the kernel
 252 * buffer provided is not large enough to hold the string, the
 253 * string is truncated. The copied string is %NULL-terminated.
 254 * If the string is being read by the user process, it is copied
 255 * and a newline '\n' is added. It is truncated if the buffer is
 256 * not large enough.
 257 *
 258 * Returns 0 on success.
 259 */
 260int proc_dostring(struct ctl_table *table, int write,
 261		  void *buffer, size_t *lenp, loff_t *ppos)
 262{
 263	if (write)
 264		proc_first_pos_non_zero_ignore(ppos, table);
 265
 266	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
 267			ppos);
 268}
 269
 270static void proc_skip_spaces(char **buf, size_t *size)
 271{
 272	while (*size) {
 273		if (!isspace(**buf))
 274			break;
 275		(*size)--;
 276		(*buf)++;
 277	}
 278}
 279
 280static void proc_skip_char(char **buf, size_t *size, const char v)
 281{
 282	while (*size) {
 283		if (**buf != v)
 284			break;
 285		(*size)--;
 286		(*buf)++;
 287	}
 288}
 289
 290/**
 291 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
 292 *                   fail on overflow
 293 *
 294 * @cp: kernel buffer containing the string to parse
 295 * @endp: pointer to store the trailing characters
 296 * @base: the base to use
 297 * @res: where the parsed integer will be stored
 298 *
 299 * In case of success 0 is returned and @res will contain the parsed integer,
 300 * @endp will hold any trailing characters.
 301 * This function will fail the parse on overflow. If there wasn't an overflow
 302 * the function will defer the decision what characters count as invalid to the
 303 * caller.
 304 */
 305static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
 306			   unsigned long *res)
 307{
 308	unsigned long long result;
 309	unsigned int rv;
 310
 311	cp = _parse_integer_fixup_radix(cp, &base);
 312	rv = _parse_integer(cp, base, &result);
 313	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
 314		return -ERANGE;
 315
 316	cp += rv;
 317
 318	if (endp)
 319		*endp = (char *)cp;
 320
 321	*res = (unsigned long)result;
 322	return 0;
 323}
 324
 325#define TMPBUFLEN 22
 326/**
 327 * proc_get_long - reads an ASCII formatted integer from a user buffer
 328 *
 329 * @buf: a kernel buffer
 330 * @size: size of the kernel buffer
 331 * @val: this is where the number will be stored
 332 * @neg: set to %TRUE if number is negative
 333 * @perm_tr: a vector which contains the allowed trailers
 334 * @perm_tr_len: size of the perm_tr vector
 335 * @tr: pointer to store the trailer character
 336 *
 337 * In case of success %0 is returned and @buf and @size are updated with
 338 * the amount of bytes read. If @tr is non-NULL and a trailing
 339 * character exists (size is non-zero after returning from this
 340 * function), @tr is updated with the trailing character.
 341 */
 342static int proc_get_long(char **buf, size_t *size,
 343			  unsigned long *val, bool *neg,
 344			  const char *perm_tr, unsigned perm_tr_len, char *tr)
 345{
 346	char *p, tmp[TMPBUFLEN];
 347	ssize_t len = *size;
 348
 349	if (len <= 0)
 350		return -EINVAL;
 351
 352	if (len > TMPBUFLEN - 1)
 353		len = TMPBUFLEN - 1;
 354
 355	memcpy(tmp, *buf, len);
 356
 357	tmp[len] = 0;
 358	p = tmp;
 359	if (*p == '-' && *size > 1) {
 360		*neg = true;
 361		p++;
 362	} else
 363		*neg = false;
 364	if (!isdigit(*p))
 365		return -EINVAL;
 366
 367	if (strtoul_lenient(p, &p, 0, val))
 368		return -EINVAL;
 369
 370	len = p - tmp;
 371
 372	/* We don't know if the next char is whitespace thus we may accept
 373	 * invalid integers (e.g. 1234...a) or two integers instead of one
 374	 * (e.g. 123...1). So lets not allow such large numbers. */
 375	if (len == TMPBUFLEN - 1)
 376		return -EINVAL;
 377
 378	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
 379		return -EINVAL;
 380
 381	if (tr && (len < *size))
 382		*tr = *p;
 383
 384	*buf += len;
 385	*size -= len;
 386
 387	return 0;
 388}
 389
 390/**
 391 * proc_put_long - converts an integer to a decimal ASCII formatted string
 392 *
 393 * @buf: the user buffer
 394 * @size: the size of the user buffer
 395 * @val: the integer to be converted
 396 * @neg: sign of the number, %TRUE for negative
 397 *
 398 * In case of success @buf and @size are updated with the amount of bytes
 399 * written.
 400 */
 401static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
 402{
 403	int len;
 404	char tmp[TMPBUFLEN], *p = tmp;
 405
 406	sprintf(p, "%s%lu", neg ? "-" : "", val);
 407	len = strlen(tmp);
 408	if (len > *size)
 409		len = *size;
 410	memcpy(*buf, tmp, len);
 411	*size -= len;
 412	*buf += len;
 413}
 414#undef TMPBUFLEN
 415
 416static void proc_put_char(void **buf, size_t *size, char c)
 417{
 418	if (*size) {
 419		char **buffer = (char **)buf;
 420		**buffer = c;
 421
 422		(*size)--;
 423		(*buffer)++;
 424		*buf = *buffer;
 425	}
 426}
 427
 428static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
 429				int *valp,
 430				int write, void *data)
 431{
 432	if (write) {
 433		*(bool *)valp = *lvalp;
 434	} else {
 435		int val = *(bool *)valp;
 436
 437		*lvalp = (unsigned long)val;
 438		*negp = false;
 439	}
 440	return 0;
 441}
 442
 443static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 444				 int *valp,
 445				 int write, void *data)
 446{
 447	if (write) {
 448		if (*negp) {
 449			if (*lvalp > (unsigned long) INT_MAX + 1)
 450				return -EINVAL;
 451			WRITE_ONCE(*valp, -*lvalp);
 452		} else {
 453			if (*lvalp > (unsigned long) INT_MAX)
 454				return -EINVAL;
 455			WRITE_ONCE(*valp, *lvalp);
 456		}
 457	} else {
 458		int val = READ_ONCE(*valp);
 459		if (val < 0) {
 460			*negp = true;
 461			*lvalp = -(unsigned long)val;
 462		} else {
 463			*negp = false;
 464			*lvalp = (unsigned long)val;
 465		}
 466	}
 467	return 0;
 468}
 469
 470static int do_proc_douintvec_conv(unsigned long *lvalp,
 471				  unsigned int *valp,
 472				  int write, void *data)
 473{
 474	if (write) {
 475		if (*lvalp > UINT_MAX)
 476			return -EINVAL;
 477		WRITE_ONCE(*valp, *lvalp);
 478	} else {
 479		unsigned int val = READ_ONCE(*valp);
 480		*lvalp = (unsigned long)val;
 481	}
 482	return 0;
 483}
 484
 485static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
 486
 487static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 488		  int write, void *buffer,
 489		  size_t *lenp, loff_t *ppos,
 490		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
 491			      int write, void *data),
 492		  void *data)
 493{
 494	int *i, vleft, first = 1, err = 0;
 495	size_t left;
 496	char *p;
 497
 498	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
 499		*lenp = 0;
 500		return 0;
 501	}
 502
 503	i = (int *) tbl_data;
 504	vleft = table->maxlen / sizeof(*i);
 505	left = *lenp;
 506
 507	if (!conv)
 508		conv = do_proc_dointvec_conv;
 509
 510	if (write) {
 511		if (proc_first_pos_non_zero_ignore(ppos, table))
 512			goto out;
 513
 514		if (left > PAGE_SIZE - 1)
 515			left = PAGE_SIZE - 1;
 516		p = buffer;
 517	}
 518
 519	for (; left && vleft--; i++, first=0) {
 520		unsigned long lval;
 521		bool neg;
 522
 523		if (write) {
 524			proc_skip_spaces(&p, &left);
 525
 526			if (!left)
 527				break;
 528			err = proc_get_long(&p, &left, &lval, &neg,
 529					     proc_wspace_sep,
 530					     sizeof(proc_wspace_sep), NULL);
 531			if (err)
 532				break;
 533			if (conv(&neg, &lval, i, 1, data)) {
 534				err = -EINVAL;
 535				break;
 536			}
 537		} else {
 538			if (conv(&neg, &lval, i, 0, data)) {
 539				err = -EINVAL;
 540				break;
 541			}
 542			if (!first)
 543				proc_put_char(&buffer, &left, '\t');
 544			proc_put_long(&buffer, &left, lval, neg);
 545		}
 546	}
 547
 548	if (!write && !first && left && !err)
 549		proc_put_char(&buffer, &left, '\n');
 550	if (write && !err && left)
 551		proc_skip_spaces(&p, &left);
 552	if (write && first)
 553		return err ? : -EINVAL;
 554	*lenp -= left;
 555out:
 556	*ppos += *lenp;
 557	return err;
 558}
 559
 560static int do_proc_dointvec(struct ctl_table *table, int write,
 561		  void *buffer, size_t *lenp, loff_t *ppos,
 562		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
 563			      int write, void *data),
 564		  void *data)
 565{
 566	return __do_proc_dointvec(table->data, table, write,
 567			buffer, lenp, ppos, conv, data);
 568}
 569
 570static int do_proc_douintvec_w(unsigned int *tbl_data,
 571			       struct ctl_table *table,
 572			       void *buffer,
 573			       size_t *lenp, loff_t *ppos,
 574			       int (*conv)(unsigned long *lvalp,
 575					   unsigned int *valp,
 576					   int write, void *data),
 577			       void *data)
 578{
 579	unsigned long lval;
 580	int err = 0;
 581	size_t left;
 582	bool neg;
 583	char *p = buffer;
 584
 585	left = *lenp;
 586
 587	if (proc_first_pos_non_zero_ignore(ppos, table))
 588		goto bail_early;
 589
 590	if (left > PAGE_SIZE - 1)
 591		left = PAGE_SIZE - 1;
 592
 593	proc_skip_spaces(&p, &left);
 594	if (!left) {
 595		err = -EINVAL;
 596		goto out_free;
 597	}
 598
 599	err = proc_get_long(&p, &left, &lval, &neg,
 600			     proc_wspace_sep,
 601			     sizeof(proc_wspace_sep), NULL);
 602	if (err || neg) {
 603		err = -EINVAL;
 604		goto out_free;
 605	}
 606
 607	if (conv(&lval, tbl_data, 1, data)) {
 608		err = -EINVAL;
 609		goto out_free;
 610	}
 611
 612	if (!err && left)
 613		proc_skip_spaces(&p, &left);
 614
 615out_free:
 616	if (err)
 617		return -EINVAL;
 618
 619	return 0;
 620
 621	/* This is in keeping with old __do_proc_dointvec() */
 622bail_early:
 623	*ppos += *lenp;
 624	return err;
 625}
 626
 627static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
 628			       size_t *lenp, loff_t *ppos,
 629			       int (*conv)(unsigned long *lvalp,
 630					   unsigned int *valp,
 631					   int write, void *data),
 632			       void *data)
 633{
 634	unsigned long lval;
 635	int err = 0;
 636	size_t left;
 637
 638	left = *lenp;
 639
 640	if (conv(&lval, tbl_data, 0, data)) {
 641		err = -EINVAL;
 642		goto out;
 643	}
 644
 645	proc_put_long(&buffer, &left, lval, false);
 646	if (!left)
 647		goto out;
 648
 649	proc_put_char(&buffer, &left, '\n');
 650
 651out:
 652	*lenp -= left;
 653	*ppos += *lenp;
 654
 655	return err;
 656}
 657
 658static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
 659			       int write, void *buffer,
 660			       size_t *lenp, loff_t *ppos,
 661			       int (*conv)(unsigned long *lvalp,
 662					   unsigned int *valp,
 663					   int write, void *data),
 664			       void *data)
 665{
 666	unsigned int *i, vleft;
 667
 668	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
 669		*lenp = 0;
 670		return 0;
 671	}
 672
 673	i = (unsigned int *) tbl_data;
 674	vleft = table->maxlen / sizeof(*i);
 675
 676	/*
 677	 * Arrays are not supported, keep this simple. *Do not* add
 678	 * support for them.
 679	 */
 680	if (vleft != 1) {
 681		*lenp = 0;
 682		return -EINVAL;
 683	}
 684
 685	if (!conv)
 686		conv = do_proc_douintvec_conv;
 687
 688	if (write)
 689		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
 690					   conv, data);
 691	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
 692}
 693
 694int do_proc_douintvec(struct ctl_table *table, int write,
 695		      void *buffer, size_t *lenp, loff_t *ppos,
 696		      int (*conv)(unsigned long *lvalp,
 697				  unsigned int *valp,
 698				  int write, void *data),
 699		      void *data)
 700{
 701	return __do_proc_douintvec(table->data, table, write,
 702				   buffer, lenp, ppos, conv, data);
 703}
 704
 705/**
 706 * proc_dobool - read/write a bool
 707 * @table: the sysctl table
 708 * @write: %TRUE if this is a write to the sysctl file
 709 * @buffer: the user buffer
 710 * @lenp: the size of the user buffer
 711 * @ppos: file position
 712 *
 713 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
 714 * values from/to the user buffer, treated as an ASCII string.
 
 
 
 715 *
 716 * Returns 0 on success.
 717 */
 718int proc_dobool(struct ctl_table *table, int write, void *buffer,
 719		size_t *lenp, loff_t *ppos)
 720{
 721	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 722				do_proc_dobool_conv, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 723}
 724
 725/**
 726 * proc_dointvec - read a vector of integers
 727 * @table: the sysctl table
 728 * @write: %TRUE if this is a write to the sysctl file
 729 * @buffer: the user buffer
 730 * @lenp: the size of the user buffer
 731 * @ppos: file position
 732 *
 733 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
 734 * values from/to the user buffer, treated as an ASCII string.
 735 *
 736 * Returns 0 on success.
 737 */
 738int proc_dointvec(struct ctl_table *table, int write, void *buffer,
 739		  size_t *lenp, loff_t *ppos)
 740{
 741	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
 742}
 743
 744#ifdef CONFIG_COMPACTION
 745static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
 746		int write, void *buffer, size_t *lenp, loff_t *ppos)
 747{
 748	int ret, old;
 749
 750	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
 751		return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 752
 753	old = *(int *)table->data;
 754	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 755	if (ret)
 756		return ret;
 757	if (old != *(int *)table->data)
 758		pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
 759			     table->procname, current->comm,
 760			     task_pid_nr(current));
 761	return ret;
 762}
 763#endif
 764
 765/**
 766 * proc_douintvec - read a vector of unsigned integers
 767 * @table: the sysctl table
 768 * @write: %TRUE if this is a write to the sysctl file
 769 * @buffer: the user buffer
 770 * @lenp: the size of the user buffer
 771 * @ppos: file position
 772 *
 773 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
 774 * values from/to the user buffer, treated as an ASCII string.
 775 *
 776 * Returns 0 on success.
 777 */
 778int proc_douintvec(struct ctl_table *table, int write, void *buffer,
 779		size_t *lenp, loff_t *ppos)
 780{
 781	return do_proc_douintvec(table, write, buffer, lenp, ppos,
 782				 do_proc_douintvec_conv, NULL);
 783}
 784
 785/*
 786 * Taint values can only be increased
 787 * This means we can safely use a temporary.
 788 */
 789static int proc_taint(struct ctl_table *table, int write,
 790			       void *buffer, size_t *lenp, loff_t *ppos)
 791{
 792	struct ctl_table t;
 793	unsigned long tmptaint = get_taint();
 794	int err;
 795
 796	if (write && !capable(CAP_SYS_ADMIN))
 797		return -EPERM;
 798
 799	t = *table;
 800	t.data = &tmptaint;
 801	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
 802	if (err < 0)
 803		return err;
 804
 805	if (write) {
 806		int i;
 807
 808		/*
 809		 * If we are relying on panic_on_taint not producing
 810		 * false positives due to userspace input, bail out
 811		 * before setting the requested taint flags.
 812		 */
 813		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
 814			return -EINVAL;
 815
 816		/*
 817		 * Poor man's atomic or. Not worth adding a primitive
 818		 * to everyone's atomic.h for this
 819		 */
 820		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
 821			if ((1UL << i) & tmptaint)
 822				add_taint(i, LOCKDEP_STILL_OK);
 823	}
 824
 825	return err;
 826}
 827
 828/**
 829 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
 830 * @min: pointer to minimum allowable value
 831 * @max: pointer to maximum allowable value
 832 *
 833 * The do_proc_dointvec_minmax_conv_param structure provides the
 834 * minimum and maximum values for doing range checking for those sysctl
 835 * parameters that use the proc_dointvec_minmax() handler.
 836 */
 837struct do_proc_dointvec_minmax_conv_param {
 838	int *min;
 839	int *max;
 840};
 841
 842static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
 843					int *valp,
 844					int write, void *data)
 845{
 846	int tmp, ret;
 847	struct do_proc_dointvec_minmax_conv_param *param = data;
 848	/*
 849	 * If writing, first do so via a temporary local int so we can
 850	 * bounds-check it before touching *valp.
 851	 */
 852	int *ip = write ? &tmp : valp;
 853
 854	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
 855	if (ret)
 856		return ret;
 857
 858	if (write) {
 859		if ((param->min && *param->min > tmp) ||
 860		    (param->max && *param->max < tmp))
 861			return -EINVAL;
 862		WRITE_ONCE(*valp, tmp);
 863	}
 864
 865	return 0;
 866}
 867
 868/**
 869 * proc_dointvec_minmax - read a vector of integers with min/max values
 870 * @table: the sysctl table
 871 * @write: %TRUE if this is a write to the sysctl file
 872 * @buffer: the user buffer
 873 * @lenp: the size of the user buffer
 874 * @ppos: file position
 875 *
 876 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
 877 * values from/to the user buffer, treated as an ASCII string.
 878 *
 879 * This routine will ensure the values are within the range specified by
 880 * table->extra1 (min) and table->extra2 (max).
 881 *
 882 * Returns 0 on success or -EINVAL on write when the range check fails.
 883 */
 884int proc_dointvec_minmax(struct ctl_table *table, int write,
 885		  void *buffer, size_t *lenp, loff_t *ppos)
 886{
 887	struct do_proc_dointvec_minmax_conv_param param = {
 888		.min = (int *) table->extra1,
 889		.max = (int *) table->extra2,
 890	};
 891	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 892				do_proc_dointvec_minmax_conv, &param);
 893}
 894
 895/**
 896 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
 897 * @min: pointer to minimum allowable value
 898 * @max: pointer to maximum allowable value
 899 *
 900 * The do_proc_douintvec_minmax_conv_param structure provides the
 901 * minimum and maximum values for doing range checking for those sysctl
 902 * parameters that use the proc_douintvec_minmax() handler.
 903 */
 904struct do_proc_douintvec_minmax_conv_param {
 905	unsigned int *min;
 906	unsigned int *max;
 907};
 908
 909static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
 910					 unsigned int *valp,
 911					 int write, void *data)
 912{
 913	int ret;
 914	unsigned int tmp;
 915	struct do_proc_douintvec_minmax_conv_param *param = data;
 916	/* write via temporary local uint for bounds-checking */
 917	unsigned int *up = write ? &tmp : valp;
 918
 919	ret = do_proc_douintvec_conv(lvalp, up, write, data);
 920	if (ret)
 921		return ret;
 922
 923	if (write) {
 924		if ((param->min && *param->min > tmp) ||
 925		    (param->max && *param->max < tmp))
 926			return -ERANGE;
 927
 928		WRITE_ONCE(*valp, tmp);
 929	}
 930
 931	return 0;
 932}
 933
 934/**
 935 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
 936 * @table: the sysctl table
 937 * @write: %TRUE if this is a write to the sysctl file
 938 * @buffer: the user buffer
 939 * @lenp: the size of the user buffer
 940 * @ppos: file position
 941 *
 942 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
 943 * values from/to the user buffer, treated as an ASCII string. Negative
 944 * strings are not allowed.
 945 *
 946 * This routine will ensure the values are within the range specified by
 947 * table->extra1 (min) and table->extra2 (max). There is a final sanity
 948 * check for UINT_MAX to avoid having to support wrap around uses from
 949 * userspace.
 950 *
 951 * Returns 0 on success or -ERANGE on write when the range check fails.
 952 */
 953int proc_douintvec_minmax(struct ctl_table *table, int write,
 954			  void *buffer, size_t *lenp, loff_t *ppos)
 955{
 956	struct do_proc_douintvec_minmax_conv_param param = {
 957		.min = (unsigned int *) table->extra1,
 958		.max = (unsigned int *) table->extra2,
 959	};
 960	return do_proc_douintvec(table, write, buffer, lenp, ppos,
 961				 do_proc_douintvec_minmax_conv, &param);
 962}
 963
 964/**
 965 * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
 966 * @table: the sysctl table
 967 * @write: %TRUE if this is a write to the sysctl file
 968 * @buffer: the user buffer
 969 * @lenp: the size of the user buffer
 970 * @ppos: file position
 971 *
 972 * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
 973 * values from/to the user buffer, treated as an ASCII string. Negative
 974 * strings are not allowed.
 975 *
 976 * This routine will ensure the values are within the range specified by
 977 * table->extra1 (min) and table->extra2 (max).
 978 *
 979 * Returns 0 on success or an error on write when the range check fails.
 980 */
 981int proc_dou8vec_minmax(struct ctl_table *table, int write,
 982			void *buffer, size_t *lenp, loff_t *ppos)
 983{
 984	struct ctl_table tmp;
 985	unsigned int min = 0, max = 255U, val;
 986	u8 *data = table->data;
 987	struct do_proc_douintvec_minmax_conv_param param = {
 988		.min = &min,
 989		.max = &max,
 990	};
 991	int res;
 992
 993	/* Do not support arrays yet. */
 994	if (table->maxlen != sizeof(u8))
 995		return -EINVAL;
 996
 997	if (table->extra1) {
 998		min = *(unsigned int *) table->extra1;
 999		if (min > 255U)
1000			return -EINVAL;
1001	}
1002	if (table->extra2) {
1003		max = *(unsigned int *) table->extra2;
1004		if (max > 255U)
1005			return -EINVAL;
1006	}
1007
1008	tmp = *table;
1009
1010	tmp.maxlen = sizeof(val);
1011	tmp.data = &val;
1012	val = READ_ONCE(*data);
1013	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1014				do_proc_douintvec_minmax_conv, &param);
1015	if (res)
1016		return res;
1017	if (write)
1018		WRITE_ONCE(*data, val);
1019	return 0;
1020}
1021EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1022
1023#ifdef CONFIG_MAGIC_SYSRQ
1024static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1025				void *buffer, size_t *lenp, loff_t *ppos)
1026{
1027	int tmp, ret;
1028
1029	tmp = sysrq_mask();
1030
1031	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1032			       lenp, ppos, NULL, NULL);
1033	if (ret || !write)
1034		return ret;
1035
1036	if (write)
1037		sysrq_toggle_support(tmp);
1038
1039	return 0;
1040}
1041#endif
1042
1043static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1044		int write, void *buffer, size_t *lenp, loff_t *ppos,
 
1045		unsigned long convmul, unsigned long convdiv)
1046{
1047	unsigned long *i, *min, *max;
1048	int vleft, first = 1, err = 0;
1049	size_t left;
1050	char *p;
1051
1052	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1053		*lenp = 0;
1054		return 0;
1055	}
1056
1057	i = data;
1058	min = table->extra1;
1059	max = table->extra2;
1060	vleft = table->maxlen / sizeof(unsigned long);
1061	left = *lenp;
1062
1063	if (write) {
1064		if (proc_first_pos_non_zero_ignore(ppos, table))
1065			goto out;
1066
1067		if (left > PAGE_SIZE - 1)
1068			left = PAGE_SIZE - 1;
1069		p = buffer;
1070	}
1071
1072	for (; left && vleft--; i++, first = 0) {
1073		unsigned long val;
1074
1075		if (write) {
1076			bool neg;
1077
1078			proc_skip_spaces(&p, &left);
1079			if (!left)
1080				break;
1081
1082			err = proc_get_long(&p, &left, &val, &neg,
1083					     proc_wspace_sep,
1084					     sizeof(proc_wspace_sep), NULL);
1085			if (err || neg) {
1086				err = -EINVAL;
1087				break;
1088			}
1089
1090			val = convmul * val / convdiv;
1091			if ((min && val < *min) || (max && val > *max)) {
1092				err = -EINVAL;
1093				break;
1094			}
1095			WRITE_ONCE(*i, val);
1096		} else {
1097			val = convdiv * READ_ONCE(*i) / convmul;
1098			if (!first)
1099				proc_put_char(&buffer, &left, '\t');
1100			proc_put_long(&buffer, &left, val, false);
1101		}
1102	}
1103
1104	if (!write && !first && left && !err)
1105		proc_put_char(&buffer, &left, '\n');
1106	if (write && !err)
1107		proc_skip_spaces(&p, &left);
1108	if (write && first)
1109		return err ? : -EINVAL;
1110	*lenp -= left;
1111out:
1112	*ppos += *lenp;
1113	return err;
1114}
1115
1116static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1117		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1118		unsigned long convdiv)
1119{
1120	return __do_proc_doulongvec_minmax(table->data, table, write,
1121			buffer, lenp, ppos, convmul, convdiv);
1122}
1123
1124/**
1125 * proc_doulongvec_minmax - read a vector of long integers with min/max values
1126 * @table: the sysctl table
1127 * @write: %TRUE if this is a write to the sysctl file
1128 * @buffer: the user buffer
1129 * @lenp: the size of the user buffer
1130 * @ppos: file position
1131 *
1132 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1133 * values from/to the user buffer, treated as an ASCII string.
1134 *
1135 * This routine will ensure the values are within the range specified by
1136 * table->extra1 (min) and table->extra2 (max).
1137 *
1138 * Returns 0 on success.
1139 */
1140int proc_doulongvec_minmax(struct ctl_table *table, int write,
1141			   void *buffer, size_t *lenp, loff_t *ppos)
1142{
1143    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1144}
1145
1146/**
1147 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1148 * @table: the sysctl table
1149 * @write: %TRUE if this is a write to the sysctl file
1150 * @buffer: the user buffer
1151 * @lenp: the size of the user buffer
1152 * @ppos: file position
1153 *
1154 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1155 * values from/to the user buffer, treated as an ASCII string. The values
1156 * are treated as milliseconds, and converted to jiffies when they are stored.
1157 *
1158 * This routine will ensure the values are within the range specified by
1159 * table->extra1 (min) and table->extra2 (max).
1160 *
1161 * Returns 0 on success.
1162 */
1163int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1164				      void *buffer, size_t *lenp, loff_t *ppos)
1165{
1166    return do_proc_doulongvec_minmax(table, write, buffer,
1167				     lenp, ppos, HZ, 1000l);
1168}
1169
1170
1171static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1172					 int *valp,
1173					 int write, void *data)
1174{
1175	if (write) {
1176		if (*lvalp > INT_MAX / HZ)
1177			return 1;
1178		if (*negp)
1179			WRITE_ONCE(*valp, -*lvalp * HZ);
1180		else
1181			WRITE_ONCE(*valp, *lvalp * HZ);
1182	} else {
1183		int val = READ_ONCE(*valp);
1184		unsigned long lval;
1185		if (val < 0) {
1186			*negp = true;
1187			lval = -(unsigned long)val;
1188		} else {
1189			*negp = false;
1190			lval = (unsigned long)val;
1191		}
1192		*lvalp = lval / HZ;
1193	}
1194	return 0;
1195}
1196
1197static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1198						int *valp,
1199						int write, void *data)
1200{
1201	if (write) {
1202		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1203			return 1;
1204		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1205	} else {
1206		int val = *valp;
1207		unsigned long lval;
1208		if (val < 0) {
1209			*negp = true;
1210			lval = -(unsigned long)val;
1211		} else {
1212			*negp = false;
1213			lval = (unsigned long)val;
1214		}
1215		*lvalp = jiffies_to_clock_t(lval);
1216	}
1217	return 0;
1218}
1219
1220static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1221					    int *valp,
1222					    int write, void *data)
1223{
1224	if (write) {
1225		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1226
1227		if (jif > INT_MAX)
1228			return 1;
1229		WRITE_ONCE(*valp, (int)jif);
1230	} else {
1231		int val = READ_ONCE(*valp);
1232		unsigned long lval;
1233		if (val < 0) {
1234			*negp = true;
1235			lval = -(unsigned long)val;
1236		} else {
1237			*negp = false;
1238			lval = (unsigned long)val;
1239		}
1240		*lvalp = jiffies_to_msecs(lval);
1241	}
1242	return 0;
1243}
1244
1245static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1246						int *valp, int write, void *data)
1247{
1248	int tmp, ret;
1249	struct do_proc_dointvec_minmax_conv_param *param = data;
1250	/*
1251	 * If writing, first do so via a temporary local int so we can
1252	 * bounds-check it before touching *valp.
1253	 */
1254	int *ip = write ? &tmp : valp;
1255
1256	ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1257	if (ret)
1258		return ret;
1259
1260	if (write) {
1261		if ((param->min && *param->min > tmp) ||
1262				(param->max && *param->max < tmp))
1263			return -EINVAL;
1264		*valp = tmp;
1265	}
1266	return 0;
1267}
1268
1269/**
1270 * proc_dointvec_jiffies - read a vector of integers as seconds
1271 * @table: the sysctl table
1272 * @write: %TRUE if this is a write to the sysctl file
1273 * @buffer: the user buffer
1274 * @lenp: the size of the user buffer
1275 * @ppos: file position
1276 *
1277 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1278 * values from/to the user buffer, treated as an ASCII string.
1279 * The values read are assumed to be in seconds, and are converted into
1280 * jiffies.
1281 *
1282 * Returns 0 on success.
1283 */
1284int proc_dointvec_jiffies(struct ctl_table *table, int write,
1285			  void *buffer, size_t *lenp, loff_t *ppos)
1286{
1287    return do_proc_dointvec(table,write,buffer,lenp,ppos,
1288		    	    do_proc_dointvec_jiffies_conv,NULL);
1289}
1290
1291int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1292			  void *buffer, size_t *lenp, loff_t *ppos)
1293{
1294	struct do_proc_dointvec_minmax_conv_param param = {
1295		.min = (int *) table->extra1,
1296		.max = (int *) table->extra2,
1297	};
1298	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1299			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1300}
1301
1302/**
1303 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1304 * @table: the sysctl table
1305 * @write: %TRUE if this is a write to the sysctl file
1306 * @buffer: the user buffer
1307 * @lenp: the size of the user buffer
1308 * @ppos: pointer to the file position
1309 *
1310 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1311 * values from/to the user buffer, treated as an ASCII string.
1312 * The values read are assumed to be in 1/USER_HZ seconds, and
1313 * are converted into jiffies.
1314 *
1315 * Returns 0 on success.
1316 */
1317int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1318				 void *buffer, size_t *lenp, loff_t *ppos)
1319{
1320	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1321				do_proc_dointvec_userhz_jiffies_conv, NULL);
1322}
1323
1324/**
1325 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1326 * @table: the sysctl table
1327 * @write: %TRUE if this is a write to the sysctl file
1328 * @buffer: the user buffer
1329 * @lenp: the size of the user buffer
1330 * @ppos: file position
1331 * @ppos: the current position in the file
1332 *
1333 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1334 * values from/to the user buffer, treated as an ASCII string.
1335 * The values read are assumed to be in 1/1000 seconds, and
1336 * are converted into jiffies.
1337 *
1338 * Returns 0 on success.
1339 */
1340int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1341		size_t *lenp, loff_t *ppos)
1342{
1343	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1344				do_proc_dointvec_ms_jiffies_conv, NULL);
1345}
1346
1347static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1348		size_t *lenp, loff_t *ppos)
1349{
1350	struct pid *new_pid;
1351	pid_t tmp;
1352	int r;
1353
1354	tmp = pid_vnr(cad_pid);
1355
1356	r = __do_proc_dointvec(&tmp, table, write, buffer,
1357			       lenp, ppos, NULL, NULL);
1358	if (r || !write)
1359		return r;
1360
1361	new_pid = find_get_pid(tmp);
1362	if (!new_pid)
1363		return -ESRCH;
1364
1365	put_pid(xchg(&cad_pid, new_pid));
1366	return 0;
1367}
1368
1369/**
1370 * proc_do_large_bitmap - read/write from/to a large bitmap
1371 * @table: the sysctl table
1372 * @write: %TRUE if this is a write to the sysctl file
1373 * @buffer: the user buffer
1374 * @lenp: the size of the user buffer
1375 * @ppos: file position
1376 *
1377 * The bitmap is stored at table->data and the bitmap length (in bits)
1378 * in table->maxlen.
1379 *
1380 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1381 * large bitmaps may be represented in a compact manner. Writing into
1382 * the file will clear the bitmap then update it with the given input.
1383 *
1384 * Returns 0 on success.
1385 */
1386int proc_do_large_bitmap(struct ctl_table *table, int write,
1387			 void *buffer, size_t *lenp, loff_t *ppos)
1388{
1389	int err = 0;
1390	size_t left = *lenp;
1391	unsigned long bitmap_len = table->maxlen;
1392	unsigned long *bitmap = *(unsigned long **) table->data;
1393	unsigned long *tmp_bitmap = NULL;
1394	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1395
1396	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1397		*lenp = 0;
1398		return 0;
1399	}
1400
1401	if (write) {
1402		char *p = buffer;
1403		size_t skipped = 0;
1404
1405		if (left > PAGE_SIZE - 1) {
1406			left = PAGE_SIZE - 1;
1407			/* How much of the buffer we'll skip this pass */
1408			skipped = *lenp - left;
1409		}
1410
1411		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1412		if (!tmp_bitmap)
1413			return -ENOMEM;
1414		proc_skip_char(&p, &left, '\n');
1415		while (!err && left) {
1416			unsigned long val_a, val_b;
1417			bool neg;
1418			size_t saved_left;
1419
1420			/* In case we stop parsing mid-number, we can reset */
1421			saved_left = left;
1422			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1423					     sizeof(tr_a), &c);
1424			/*
1425			 * If we consumed the entirety of a truncated buffer or
1426			 * only one char is left (may be a "-"), then stop here,
1427			 * reset, & come back for more.
1428			 */
1429			if ((left <= 1) && skipped) {
1430				left = saved_left;
1431				break;
1432			}
1433
1434			if (err)
1435				break;
1436			if (val_a >= bitmap_len || neg) {
1437				err = -EINVAL;
1438				break;
1439			}
1440
1441			val_b = val_a;
1442			if (left) {
1443				p++;
1444				left--;
1445			}
1446
1447			if (c == '-') {
1448				err = proc_get_long(&p, &left, &val_b,
1449						     &neg, tr_b, sizeof(tr_b),
1450						     &c);
1451				/*
1452				 * If we consumed all of a truncated buffer or
1453				 * then stop here, reset, & come back for more.
1454				 */
1455				if (!left && skipped) {
1456					left = saved_left;
1457					break;
1458				}
1459
1460				if (err)
1461					break;
1462				if (val_b >= bitmap_len || neg ||
1463				    val_a > val_b) {
1464					err = -EINVAL;
1465					break;
1466				}
1467				if (left) {
1468					p++;
1469					left--;
1470				}
1471			}
1472
1473			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1474			proc_skip_char(&p, &left, '\n');
1475		}
1476		left += skipped;
1477	} else {
1478		unsigned long bit_a, bit_b = 0;
1479		bool first = 1;
1480
1481		while (left) {
1482			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1483			if (bit_a >= bitmap_len)
1484				break;
1485			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1486						   bit_a + 1) - 1;
1487
1488			if (!first)
1489				proc_put_char(&buffer, &left, ',');
1490			proc_put_long(&buffer, &left, bit_a, false);
1491			if (bit_a != bit_b) {
1492				proc_put_char(&buffer, &left, '-');
1493				proc_put_long(&buffer, &left, bit_b, false);
1494			}
1495
1496			first = 0; bit_b++;
1497		}
1498		proc_put_char(&buffer, &left, '\n');
1499	}
1500
1501	if (!err) {
1502		if (write) {
1503			if (*ppos)
1504				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1505			else
1506				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1507		}
1508		*lenp -= left;
1509		*ppos += *lenp;
1510	}
1511
1512	bitmap_free(tmp_bitmap);
1513	return err;
1514}
1515
1516#else /* CONFIG_PROC_SYSCTL */
1517
1518int proc_dostring(struct ctl_table *table, int write,
1519		  void *buffer, size_t *lenp, loff_t *ppos)
1520{
1521	return -ENOSYS;
1522}
1523
1524int proc_dobool(struct ctl_table *table, int write,
1525		void *buffer, size_t *lenp, loff_t *ppos)
1526{
1527	return -ENOSYS;
1528}
1529
1530int proc_dointvec(struct ctl_table *table, int write,
1531		  void *buffer, size_t *lenp, loff_t *ppos)
1532{
1533	return -ENOSYS;
1534}
1535
1536int proc_douintvec(struct ctl_table *table, int write,
1537		  void *buffer, size_t *lenp, loff_t *ppos)
1538{
1539	return -ENOSYS;
1540}
1541
1542int proc_dointvec_minmax(struct ctl_table *table, int write,
1543		    void *buffer, size_t *lenp, loff_t *ppos)
1544{
1545	return -ENOSYS;
1546}
1547
1548int proc_douintvec_minmax(struct ctl_table *table, int write,
1549			  void *buffer, size_t *lenp, loff_t *ppos)
1550{
1551	return -ENOSYS;
1552}
1553
1554int proc_dou8vec_minmax(struct ctl_table *table, int write,
1555			void *buffer, size_t *lenp, loff_t *ppos)
1556{
1557	return -ENOSYS;
1558}
1559
1560int proc_dointvec_jiffies(struct ctl_table *table, int write,
1561		    void *buffer, size_t *lenp, loff_t *ppos)
1562{
1563	return -ENOSYS;
1564}
1565
1566int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1567				    void *buffer, size_t *lenp, loff_t *ppos)
1568{
1569	return -ENOSYS;
1570}
1571
1572int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1573		    void *buffer, size_t *lenp, loff_t *ppos)
1574{
1575	return -ENOSYS;
1576}
1577
1578int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1579			     void *buffer, size_t *lenp, loff_t *ppos)
1580{
1581	return -ENOSYS;
1582}
1583
1584int proc_doulongvec_minmax(struct ctl_table *table, int write,
1585		    void *buffer, size_t *lenp, loff_t *ppos)
1586{
1587	return -ENOSYS;
1588}
1589
1590int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1591				      void *buffer, size_t *lenp, loff_t *ppos)
1592{
1593	return -ENOSYS;
1594}
1595
1596int proc_do_large_bitmap(struct ctl_table *table, int write,
1597			 void *buffer, size_t *lenp, loff_t *ppos)
1598{
1599	return -ENOSYS;
1600}
1601
1602#endif /* CONFIG_PROC_SYSCTL */
1603
1604#if defined(CONFIG_SYSCTL)
1605int proc_do_static_key(struct ctl_table *table, int write,
1606		       void *buffer, size_t *lenp, loff_t *ppos)
1607{
1608	struct static_key *key = (struct static_key *)table->data;
1609	static DEFINE_MUTEX(static_key_mutex);
1610	int val, ret;
1611	struct ctl_table tmp = {
1612		.data   = &val,
1613		.maxlen = sizeof(val),
1614		.mode   = table->mode,
1615		.extra1 = SYSCTL_ZERO,
1616		.extra2 = SYSCTL_ONE,
1617	};
1618
1619	if (write && !capable(CAP_SYS_ADMIN))
1620		return -EPERM;
1621
1622	mutex_lock(&static_key_mutex);
1623	val = static_key_enabled(key);
1624	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1625	if (write && !ret) {
1626		if (val)
1627			static_key_enable(key);
1628		else
1629			static_key_disable(key);
1630	}
1631	mutex_unlock(&static_key_mutex);
1632	return ret;
1633}
1634
1635static struct ctl_table kern_table[] = {
1636	{
1637		.procname	= "panic",
1638		.data		= &panic_timeout,
1639		.maxlen		= sizeof(int),
1640		.mode		= 0644,
1641		.proc_handler	= proc_dointvec,
1642	},
1643#ifdef CONFIG_PROC_SYSCTL
1644	{
1645		.procname	= "tainted",
1646		.maxlen 	= sizeof(long),
1647		.mode		= 0644,
1648		.proc_handler	= proc_taint,
1649	},
1650	{
1651		.procname	= "sysctl_writes_strict",
1652		.data		= &sysctl_writes_strict,
1653		.maxlen		= sizeof(int),
1654		.mode		= 0644,
1655		.proc_handler	= proc_dointvec_minmax,
1656		.extra1		= SYSCTL_NEG_ONE,
1657		.extra2		= SYSCTL_ONE,
1658	},
1659#endif
1660	{
1661		.procname	= "print-fatal-signals",
1662		.data		= &print_fatal_signals,
1663		.maxlen		= sizeof(int),
1664		.mode		= 0644,
1665		.proc_handler	= proc_dointvec,
1666	},
1667#ifdef CONFIG_SPARC
1668	{
1669		.procname	= "reboot-cmd",
1670		.data		= reboot_command,
1671		.maxlen		= 256,
1672		.mode		= 0644,
1673		.proc_handler	= proc_dostring,
1674	},
1675	{
1676		.procname	= "stop-a",
1677		.data		= &stop_a_enabled,
1678		.maxlen		= sizeof (int),
1679		.mode		= 0644,
1680		.proc_handler	= proc_dointvec,
1681	},
1682	{
1683		.procname	= "scons-poweroff",
1684		.data		= &scons_pwroff,
1685		.maxlen		= sizeof (int),
1686		.mode		= 0644,
1687		.proc_handler	= proc_dointvec,
1688	},
1689#endif
1690#ifdef CONFIG_SPARC64
1691	{
1692		.procname	= "tsb-ratio",
1693		.data		= &sysctl_tsb_ratio,
1694		.maxlen		= sizeof (int),
1695		.mode		= 0644,
1696		.proc_handler	= proc_dointvec,
1697	},
1698#endif
1699#ifdef CONFIG_PARISC
1700	{
1701		.procname	= "soft-power",
1702		.data		= &pwrsw_enabled,
1703		.maxlen		= sizeof (int),
1704		.mode		= 0644,
1705		.proc_handler	= proc_dointvec,
1706	},
1707#endif
1708#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1709	{
1710		.procname	= "unaligned-trap",
1711		.data		= &unaligned_enabled,
1712		.maxlen		= sizeof (int),
1713		.mode		= 0644,
1714		.proc_handler	= proc_dointvec,
1715	},
1716#endif
1717#ifdef CONFIG_STACK_TRACER
1718	{
1719		.procname	= "stack_tracer_enabled",
1720		.data		= &stack_tracer_enabled,
1721		.maxlen		= sizeof(int),
1722		.mode		= 0644,
1723		.proc_handler	= stack_trace_sysctl,
1724	},
1725#endif
1726#ifdef CONFIG_TRACING
1727	{
1728		.procname	= "ftrace_dump_on_oops",
1729		.data		= &ftrace_dump_on_oops,
1730		.maxlen		= sizeof(int),
1731		.mode		= 0644,
1732		.proc_handler	= proc_dointvec,
1733	},
1734	{
1735		.procname	= "traceoff_on_warning",
1736		.data		= &__disable_trace_on_warning,
1737		.maxlen		= sizeof(__disable_trace_on_warning),
1738		.mode		= 0644,
1739		.proc_handler	= proc_dointvec,
1740	},
1741	{
1742		.procname	= "tracepoint_printk",
1743		.data		= &tracepoint_printk,
1744		.maxlen		= sizeof(tracepoint_printk),
1745		.mode		= 0644,
1746		.proc_handler	= tracepoint_printk_sysctl,
1747	},
1748#endif
1749#ifdef CONFIG_MODULES
1750	{
1751		.procname	= "modprobe",
1752		.data		= &modprobe_path,
1753		.maxlen		= KMOD_PATH_LEN,
1754		.mode		= 0644,
1755		.proc_handler	= proc_dostring,
1756	},
1757	{
1758		.procname	= "modules_disabled",
1759		.data		= &modules_disabled,
1760		.maxlen		= sizeof(int),
1761		.mode		= 0644,
1762		/* only handle a transition from default "0" to "1" */
1763		.proc_handler	= proc_dointvec_minmax,
1764		.extra1		= SYSCTL_ONE,
1765		.extra2		= SYSCTL_ONE,
1766	},
1767#endif
1768#ifdef CONFIG_UEVENT_HELPER
1769	{
1770		.procname	= "hotplug",
1771		.data		= &uevent_helper,
1772		.maxlen		= UEVENT_HELPER_PATH_LEN,
1773		.mode		= 0644,
1774		.proc_handler	= proc_dostring,
1775	},
1776#endif
1777#ifdef CONFIG_MAGIC_SYSRQ
1778	{
1779		.procname	= "sysrq",
1780		.data		= NULL,
1781		.maxlen		= sizeof (int),
1782		.mode		= 0644,
1783		.proc_handler	= sysrq_sysctl_handler,
1784	},
1785#endif
1786#ifdef CONFIG_PROC_SYSCTL
1787	{
1788		.procname	= "cad_pid",
1789		.data		= NULL,
1790		.maxlen		= sizeof (int),
1791		.mode		= 0600,
1792		.proc_handler	= proc_do_cad_pid,
1793	},
1794#endif
1795	{
1796		.procname	= "threads-max",
1797		.data		= NULL,
1798		.maxlen		= sizeof(int),
1799		.mode		= 0644,
1800		.proc_handler	= sysctl_max_threads,
1801	},
1802	{
1803		.procname	= "usermodehelper",
1804		.mode		= 0555,
1805		.child		= usermodehelper_table,
1806	},
1807	{
1808		.procname	= "overflowuid",
1809		.data		= &overflowuid,
1810		.maxlen		= sizeof(int),
1811		.mode		= 0644,
1812		.proc_handler	= proc_dointvec_minmax,
1813		.extra1		= SYSCTL_ZERO,
1814		.extra2		= SYSCTL_MAXOLDUID,
1815	},
1816	{
1817		.procname	= "overflowgid",
1818		.data		= &overflowgid,
1819		.maxlen		= sizeof(int),
1820		.mode		= 0644,
1821		.proc_handler	= proc_dointvec_minmax,
1822		.extra1		= SYSCTL_ZERO,
1823		.extra2		= SYSCTL_MAXOLDUID,
1824	},
1825#ifdef CONFIG_S390
1826	{
1827		.procname	= "userprocess_debug",
1828		.data		= &show_unhandled_signals,
1829		.maxlen		= sizeof(int),
1830		.mode		= 0644,
1831		.proc_handler	= proc_dointvec,
1832	},
1833#endif
1834	{
1835		.procname	= "pid_max",
1836		.data		= &pid_max,
1837		.maxlen		= sizeof (int),
1838		.mode		= 0644,
1839		.proc_handler	= proc_dointvec_minmax,
1840		.extra1		= &pid_max_min,
1841		.extra2		= &pid_max_max,
1842	},
1843	{
1844		.procname	= "panic_on_oops",
1845		.data		= &panic_on_oops,
1846		.maxlen		= sizeof(int),
1847		.mode		= 0644,
1848		.proc_handler	= proc_dointvec,
1849	},
1850	{
1851		.procname	= "panic_print",
1852		.data		= &panic_print,
1853		.maxlen		= sizeof(unsigned long),
1854		.mode		= 0644,
1855		.proc_handler	= proc_doulongvec_minmax,
1856	},
1857	{
1858		.procname	= "ngroups_max",
1859		.data		= (void *)&ngroups_max,
1860		.maxlen		= sizeof (int),
1861		.mode		= 0444,
1862		.proc_handler	= proc_dointvec,
1863	},
1864	{
1865		.procname	= "cap_last_cap",
1866		.data		= (void *)&cap_last_cap,
1867		.maxlen		= sizeof(int),
1868		.mode		= 0444,
1869		.proc_handler	= proc_dointvec,
1870	},
1871#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1872	{
1873		.procname       = "unknown_nmi_panic",
1874		.data           = &unknown_nmi_panic,
1875		.maxlen         = sizeof (int),
1876		.mode           = 0644,
1877		.proc_handler   = proc_dointvec,
1878	},
1879#endif
1880
1881#if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1882	defined(CONFIG_DEBUG_STACKOVERFLOW)
1883	{
1884		.procname	= "panic_on_stackoverflow",
1885		.data		= &sysctl_panic_on_stackoverflow,
1886		.maxlen		= sizeof(int),
1887		.mode		= 0644,
1888		.proc_handler	= proc_dointvec,
1889	},
1890#endif
1891#if defined(CONFIG_X86)
1892	{
1893		.procname	= "panic_on_unrecovered_nmi",
1894		.data		= &panic_on_unrecovered_nmi,
1895		.maxlen		= sizeof(int),
1896		.mode		= 0644,
1897		.proc_handler	= proc_dointvec,
1898	},
1899	{
1900		.procname	= "panic_on_io_nmi",
1901		.data		= &panic_on_io_nmi,
1902		.maxlen		= sizeof(int),
1903		.mode		= 0644,
1904		.proc_handler	= proc_dointvec,
1905	},
1906	{
1907		.procname	= "bootloader_type",
1908		.data		= &bootloader_type,
1909		.maxlen		= sizeof (int),
1910		.mode		= 0444,
1911		.proc_handler	= proc_dointvec,
1912	},
1913	{
1914		.procname	= "bootloader_version",
1915		.data		= &bootloader_version,
1916		.maxlen		= sizeof (int),
1917		.mode		= 0444,
1918		.proc_handler	= proc_dointvec,
1919	},
1920	{
1921		.procname	= "io_delay_type",
1922		.data		= &io_delay_type,
1923		.maxlen		= sizeof(int),
1924		.mode		= 0644,
1925		.proc_handler	= proc_dointvec,
1926	},
1927#endif
1928#if defined(CONFIG_MMU)
1929	{
1930		.procname	= "randomize_va_space",
1931		.data		= &randomize_va_space,
1932		.maxlen		= sizeof(int),
1933		.mode		= 0644,
1934		.proc_handler	= proc_dointvec,
1935	},
1936#endif
1937#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1938	{
1939		.procname	= "spin_retry",
1940		.data		= &spin_retry,
1941		.maxlen		= sizeof (int),
1942		.mode		= 0644,
1943		.proc_handler	= proc_dointvec,
1944	},
1945#endif
1946#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1947	{
1948		.procname	= "acpi_video_flags",
1949		.data		= &acpi_realmode_flags,
1950		.maxlen		= sizeof (unsigned long),
1951		.mode		= 0644,
1952		.proc_handler	= proc_doulongvec_minmax,
1953	},
1954#endif
1955#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1956	{
1957		.procname	= "ignore-unaligned-usertrap",
1958		.data		= &no_unaligned_warning,
1959		.maxlen		= sizeof (int),
1960		.mode		= 0644,
1961		.proc_handler	= proc_dointvec,
1962	},
1963#endif
1964#ifdef CONFIG_IA64
1965	{
1966		.procname	= "unaligned-dump-stack",
1967		.data		= &unaligned_dump_stack,
1968		.maxlen		= sizeof (int),
1969		.mode		= 0644,
1970		.proc_handler	= proc_dointvec,
1971	},
1972#endif
1973#ifdef CONFIG_RT_MUTEXES
1974	{
1975		.procname	= "max_lock_depth",
1976		.data		= &max_lock_depth,
1977		.maxlen		= sizeof(int),
1978		.mode		= 0644,
1979		.proc_handler	= proc_dointvec,
1980	},
1981#endif
1982#ifdef CONFIG_KEYS
1983	{
1984		.procname	= "keys",
1985		.mode		= 0555,
1986		.child		= key_sysctls,
1987	},
1988#endif
1989#ifdef CONFIG_PERF_EVENTS
1990	/*
1991	 * User-space scripts rely on the existence of this file
1992	 * as a feature check for perf_events being enabled.
1993	 *
1994	 * So it's an ABI, do not remove!
1995	 */
1996	{
1997		.procname	= "perf_event_paranoid",
1998		.data		= &sysctl_perf_event_paranoid,
1999		.maxlen		= sizeof(sysctl_perf_event_paranoid),
2000		.mode		= 0644,
2001		.proc_handler	= proc_dointvec,
2002	},
2003	{
2004		.procname	= "perf_event_mlock_kb",
2005		.data		= &sysctl_perf_event_mlock,
2006		.maxlen		= sizeof(sysctl_perf_event_mlock),
2007		.mode		= 0644,
2008		.proc_handler	= proc_dointvec,
2009	},
2010	{
2011		.procname	= "perf_event_max_sample_rate",
2012		.data		= &sysctl_perf_event_sample_rate,
2013		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
2014		.mode		= 0644,
2015		.proc_handler	= perf_proc_update_handler,
2016		.extra1		= SYSCTL_ONE,
2017	},
2018	{
2019		.procname	= "perf_cpu_time_max_percent",
2020		.data		= &sysctl_perf_cpu_time_max_percent,
2021		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
2022		.mode		= 0644,
2023		.proc_handler	= perf_cpu_time_max_percent_handler,
2024		.extra1		= SYSCTL_ZERO,
2025		.extra2		= SYSCTL_ONE_HUNDRED,
2026	},
2027	{
2028		.procname	= "perf_event_max_stack",
2029		.data		= &sysctl_perf_event_max_stack,
2030		.maxlen		= sizeof(sysctl_perf_event_max_stack),
2031		.mode		= 0644,
2032		.proc_handler	= perf_event_max_stack_handler,
2033		.extra1		= SYSCTL_ZERO,
2034		.extra2		= (void *)&six_hundred_forty_kb,
2035	},
2036	{
2037		.procname	= "perf_event_max_contexts_per_stack",
2038		.data		= &sysctl_perf_event_max_contexts_per_stack,
2039		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
2040		.mode		= 0644,
2041		.proc_handler	= perf_event_max_stack_handler,
2042		.extra1		= SYSCTL_ZERO,
2043		.extra2		= SYSCTL_ONE_THOUSAND,
2044	},
2045#endif
2046	{
2047		.procname	= "panic_on_warn",
2048		.data		= &panic_on_warn,
2049		.maxlen		= sizeof(int),
2050		.mode		= 0644,
2051		.proc_handler	= proc_dointvec_minmax,
2052		.extra1		= SYSCTL_ZERO,
2053		.extra2		= SYSCTL_ONE,
2054	},
2055#ifdef CONFIG_TREE_RCU
2056	{
2057		.procname	= "panic_on_rcu_stall",
2058		.data		= &sysctl_panic_on_rcu_stall,
2059		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2060		.mode		= 0644,
2061		.proc_handler	= proc_dointvec_minmax,
2062		.extra1		= SYSCTL_ZERO,
2063		.extra2		= SYSCTL_ONE,
2064	},
2065	{
2066		.procname	= "max_rcu_stall_to_panic",
2067		.data		= &sysctl_max_rcu_stall_to_panic,
2068		.maxlen		= sizeof(sysctl_max_rcu_stall_to_panic),
2069		.mode		= 0644,
2070		.proc_handler	= proc_dointvec_minmax,
2071		.extra1		= SYSCTL_ONE,
2072		.extra2		= SYSCTL_INT_MAX,
2073	},
2074#endif
2075	{ }
2076};
2077
2078static struct ctl_table vm_table[] = {
2079	{
2080		.procname	= "overcommit_memory",
2081		.data		= &sysctl_overcommit_memory,
2082		.maxlen		= sizeof(sysctl_overcommit_memory),
2083		.mode		= 0644,
2084		.proc_handler	= overcommit_policy_handler,
2085		.extra1		= SYSCTL_ZERO,
2086		.extra2		= SYSCTL_TWO,
2087	},
2088	{
2089		.procname	= "overcommit_ratio",
2090		.data		= &sysctl_overcommit_ratio,
2091		.maxlen		= sizeof(sysctl_overcommit_ratio),
2092		.mode		= 0644,
2093		.proc_handler	= overcommit_ratio_handler,
2094	},
2095	{
2096		.procname	= "overcommit_kbytes",
2097		.data		= &sysctl_overcommit_kbytes,
2098		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2099		.mode		= 0644,
2100		.proc_handler	= overcommit_kbytes_handler,
2101	},
2102	{
2103		.procname	= "page-cluster",
2104		.data		= &page_cluster,
2105		.maxlen		= sizeof(int),
2106		.mode		= 0644,
2107		.proc_handler	= proc_dointvec_minmax,
2108		.extra1		= SYSCTL_ZERO,
2109		.extra2		= (void *)&page_cluster_max,
2110	},
2111	{
2112		.procname	= "dirtytime_expire_seconds",
2113		.data		= &dirtytime_expire_interval,
2114		.maxlen		= sizeof(dirtytime_expire_interval),
2115		.mode		= 0644,
2116		.proc_handler	= dirtytime_interval_handler,
2117		.extra1		= SYSCTL_ZERO,
2118	},
2119	{
2120		.procname	= "swappiness",
2121		.data		= &vm_swappiness,
2122		.maxlen		= sizeof(vm_swappiness),
2123		.mode		= 0644,
2124		.proc_handler	= proc_dointvec_minmax,
2125		.extra1		= SYSCTL_ZERO,
2126		.extra2		= SYSCTL_TWO_HUNDRED,
2127	},
2128#ifdef CONFIG_NUMA
2129	{
2130		.procname	= "numa_stat",
2131		.data		= &sysctl_vm_numa_stat,
2132		.maxlen		= sizeof(int),
2133		.mode		= 0644,
2134		.proc_handler	= sysctl_vm_numa_stat_handler,
2135		.extra1		= SYSCTL_ZERO,
2136		.extra2		= SYSCTL_ONE,
2137	},
2138#endif
2139#ifdef CONFIG_HUGETLB_PAGE
2140	{
2141		.procname	= "nr_hugepages",
2142		.data		= NULL,
2143		.maxlen		= sizeof(unsigned long),
2144		.mode		= 0644,
2145		.proc_handler	= hugetlb_sysctl_handler,
2146	},
2147#ifdef CONFIG_NUMA
2148	{
2149		.procname       = "nr_hugepages_mempolicy",
2150		.data           = NULL,
2151		.maxlen         = sizeof(unsigned long),
2152		.mode           = 0644,
2153		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2154	},
2155#endif
2156	 {
2157		.procname	= "hugetlb_shm_group",
2158		.data		= &sysctl_hugetlb_shm_group,
2159		.maxlen		= sizeof(gid_t),
2160		.mode		= 0644,
2161		.proc_handler	= proc_dointvec,
2162	 },
2163	{
2164		.procname	= "nr_overcommit_hugepages",
2165		.data		= NULL,
2166		.maxlen		= sizeof(unsigned long),
2167		.mode		= 0644,
2168		.proc_handler	= hugetlb_overcommit_handler,
2169	},
2170#endif
2171	{
2172		.procname	= "lowmem_reserve_ratio",
2173		.data		= &sysctl_lowmem_reserve_ratio,
2174		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
2175		.mode		= 0644,
2176		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
2177	},
2178	{
2179		.procname	= "drop_caches",
2180		.data		= &sysctl_drop_caches,
2181		.maxlen		= sizeof(int),
2182		.mode		= 0200,
2183		.proc_handler	= drop_caches_sysctl_handler,
2184		.extra1		= SYSCTL_ONE,
2185		.extra2		= SYSCTL_FOUR,
2186	},
2187#ifdef CONFIG_COMPACTION
2188	{
2189		.procname	= "compact_memory",
2190		.data		= NULL,
2191		.maxlen		= sizeof(int),
2192		.mode		= 0200,
2193		.proc_handler	= sysctl_compaction_handler,
2194	},
2195	{
2196		.procname	= "compaction_proactiveness",
2197		.data		= &sysctl_compaction_proactiveness,
2198		.maxlen		= sizeof(sysctl_compaction_proactiveness),
2199		.mode		= 0644,
2200		.proc_handler	= compaction_proactiveness_sysctl_handler,
2201		.extra1		= SYSCTL_ZERO,
2202		.extra2		= SYSCTL_ONE_HUNDRED,
2203	},
2204	{
2205		.procname	= "extfrag_threshold",
2206		.data		= &sysctl_extfrag_threshold,
2207		.maxlen		= sizeof(int),
2208		.mode		= 0644,
2209		.proc_handler	= proc_dointvec_minmax,
2210		.extra1		= SYSCTL_ZERO,
2211		.extra2		= SYSCTL_ONE_THOUSAND,
2212	},
2213	{
2214		.procname	= "compact_unevictable_allowed",
2215		.data		= &sysctl_compact_unevictable_allowed,
2216		.maxlen		= sizeof(int),
2217		.mode		= 0644,
2218		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
2219		.extra1		= SYSCTL_ZERO,
2220		.extra2		= SYSCTL_ONE,
2221	},
2222
2223#endif /* CONFIG_COMPACTION */
2224	{
2225		.procname	= "min_free_kbytes",
2226		.data		= &min_free_kbytes,
2227		.maxlen		= sizeof(min_free_kbytes),
2228		.mode		= 0644,
2229		.proc_handler	= min_free_kbytes_sysctl_handler,
2230		.extra1		= SYSCTL_ZERO,
2231	},
2232	{
2233		.procname	= "watermark_boost_factor",
2234		.data		= &watermark_boost_factor,
2235		.maxlen		= sizeof(watermark_boost_factor),
2236		.mode		= 0644,
2237		.proc_handler	= proc_dointvec_minmax,
2238		.extra1		= SYSCTL_ZERO,
2239	},
2240	{
2241		.procname	= "watermark_scale_factor",
2242		.data		= &watermark_scale_factor,
2243		.maxlen		= sizeof(watermark_scale_factor),
2244		.mode		= 0644,
2245		.proc_handler	= watermark_scale_factor_sysctl_handler,
2246		.extra1		= SYSCTL_ONE,
2247		.extra2		= SYSCTL_THREE_THOUSAND,
2248	},
2249	{
2250		.procname	= "percpu_pagelist_high_fraction",
2251		.data		= &percpu_pagelist_high_fraction,
2252		.maxlen		= sizeof(percpu_pagelist_high_fraction),
2253		.mode		= 0644,
2254		.proc_handler	= percpu_pagelist_high_fraction_sysctl_handler,
2255		.extra1		= SYSCTL_ZERO,
2256	},
2257	{
2258		.procname	= "page_lock_unfairness",
2259		.data		= &sysctl_page_lock_unfairness,
2260		.maxlen		= sizeof(sysctl_page_lock_unfairness),
2261		.mode		= 0644,
2262		.proc_handler	= proc_dointvec_minmax,
2263		.extra1		= SYSCTL_ZERO,
2264	},
2265#ifdef CONFIG_MMU
2266	{
2267		.procname	= "max_map_count",
2268		.data		= &sysctl_max_map_count,
2269		.maxlen		= sizeof(sysctl_max_map_count),
2270		.mode		= 0644,
2271		.proc_handler	= proc_dointvec_minmax,
2272		.extra1		= SYSCTL_ZERO,
2273	},
2274#else
2275	{
2276		.procname	= "nr_trim_pages",
2277		.data		= &sysctl_nr_trim_pages,
2278		.maxlen		= sizeof(sysctl_nr_trim_pages),
2279		.mode		= 0644,
2280		.proc_handler	= proc_dointvec_minmax,
2281		.extra1		= SYSCTL_ZERO,
2282	},
2283#endif
2284	{
2285		.procname	= "vfs_cache_pressure",
2286		.data		= &sysctl_vfs_cache_pressure,
2287		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
2288		.mode		= 0644,
2289		.proc_handler	= proc_dointvec_minmax,
2290		.extra1		= SYSCTL_ZERO,
2291	},
2292#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2293    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2294	{
2295		.procname	= "legacy_va_layout",
2296		.data		= &sysctl_legacy_va_layout,
2297		.maxlen		= sizeof(sysctl_legacy_va_layout),
2298		.mode		= 0644,
2299		.proc_handler	= proc_dointvec_minmax,
2300		.extra1		= SYSCTL_ZERO,
2301	},
2302#endif
2303#ifdef CONFIG_NUMA
2304	{
2305		.procname	= "zone_reclaim_mode",
2306		.data		= &node_reclaim_mode,
2307		.maxlen		= sizeof(node_reclaim_mode),
2308		.mode		= 0644,
2309		.proc_handler	= proc_dointvec_minmax,
2310		.extra1		= SYSCTL_ZERO,
2311	},
2312	{
2313		.procname	= "min_unmapped_ratio",
2314		.data		= &sysctl_min_unmapped_ratio,
2315		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
2316		.mode		= 0644,
2317		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
2318		.extra1		= SYSCTL_ZERO,
2319		.extra2		= SYSCTL_ONE_HUNDRED,
2320	},
2321	{
2322		.procname	= "min_slab_ratio",
2323		.data		= &sysctl_min_slab_ratio,
2324		.maxlen		= sizeof(sysctl_min_slab_ratio),
2325		.mode		= 0644,
2326		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
2327		.extra1		= SYSCTL_ZERO,
2328		.extra2		= SYSCTL_ONE_HUNDRED,
2329	},
2330#endif
2331#ifdef CONFIG_SMP
2332	{
2333		.procname	= "stat_interval",
2334		.data		= &sysctl_stat_interval,
2335		.maxlen		= sizeof(sysctl_stat_interval),
2336		.mode		= 0644,
2337		.proc_handler	= proc_dointvec_jiffies,
2338	},
2339	{
2340		.procname	= "stat_refresh",
2341		.data		= NULL,
2342		.maxlen		= 0,
2343		.mode		= 0600,
2344		.proc_handler	= vmstat_refresh,
2345	},
2346#endif
2347#ifdef CONFIG_MMU
2348	{
2349		.procname	= "mmap_min_addr",
2350		.data		= &dac_mmap_min_addr,
2351		.maxlen		= sizeof(unsigned long),
2352		.mode		= 0644,
2353		.proc_handler	= mmap_min_addr_handler,
2354	},
2355#endif
2356#ifdef CONFIG_NUMA
2357	{
2358		.procname	= "numa_zonelist_order",
2359		.data		= &numa_zonelist_order,
2360		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
2361		.mode		= 0644,
2362		.proc_handler	= numa_zonelist_order_handler,
2363	},
2364#endif
2365#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2366   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2367	{
2368		.procname	= "vdso_enabled",
2369#ifdef CONFIG_X86_32
2370		.data		= &vdso32_enabled,
2371		.maxlen		= sizeof(vdso32_enabled),
2372#else
2373		.data		= &vdso_enabled,
2374		.maxlen		= sizeof(vdso_enabled),
2375#endif
2376		.mode		= 0644,
2377		.proc_handler	= proc_dointvec,
2378		.extra1		= SYSCTL_ZERO,
2379	},
2380#endif
2381#ifdef CONFIG_MEMORY_FAILURE
2382	{
2383		.procname	= "memory_failure_early_kill",
2384		.data		= &sysctl_memory_failure_early_kill,
2385		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
2386		.mode		= 0644,
2387		.proc_handler	= proc_dointvec_minmax,
2388		.extra1		= SYSCTL_ZERO,
2389		.extra2		= SYSCTL_ONE,
2390	},
2391	{
2392		.procname	= "memory_failure_recovery",
2393		.data		= &sysctl_memory_failure_recovery,
2394		.maxlen		= sizeof(sysctl_memory_failure_recovery),
2395		.mode		= 0644,
2396		.proc_handler	= proc_dointvec_minmax,
2397		.extra1		= SYSCTL_ZERO,
2398		.extra2		= SYSCTL_ONE,
2399	},
2400#endif
2401	{
2402		.procname	= "user_reserve_kbytes",
2403		.data		= &sysctl_user_reserve_kbytes,
2404		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
2405		.mode		= 0644,
2406		.proc_handler	= proc_doulongvec_minmax,
2407	},
2408	{
2409		.procname	= "admin_reserve_kbytes",
2410		.data		= &sysctl_admin_reserve_kbytes,
2411		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
2412		.mode		= 0644,
2413		.proc_handler	= proc_doulongvec_minmax,
2414	},
2415#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2416	{
2417		.procname	= "mmap_rnd_bits",
2418		.data		= &mmap_rnd_bits,
2419		.maxlen		= sizeof(mmap_rnd_bits),
2420		.mode		= 0600,
2421		.proc_handler	= proc_dointvec_minmax,
2422		.extra1		= (void *)&mmap_rnd_bits_min,
2423		.extra2		= (void *)&mmap_rnd_bits_max,
2424	},
2425#endif
2426#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2427	{
2428		.procname	= "mmap_rnd_compat_bits",
2429		.data		= &mmap_rnd_compat_bits,
2430		.maxlen		= sizeof(mmap_rnd_compat_bits),
2431		.mode		= 0600,
2432		.proc_handler	= proc_dointvec_minmax,
2433		.extra1		= (void *)&mmap_rnd_compat_bits_min,
2434		.extra2		= (void *)&mmap_rnd_compat_bits_max,
2435	},
2436#endif
2437#ifdef CONFIG_USERFAULTFD
2438	{
2439		.procname	= "unprivileged_userfaultfd",
2440		.data		= &sysctl_unprivileged_userfaultfd,
2441		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
2442		.mode		= 0644,
2443		.proc_handler	= proc_dointvec_minmax,
2444		.extra1		= SYSCTL_ZERO,
2445		.extra2		= SYSCTL_ONE,
2446	},
2447#endif
2448	{ }
2449};
2450
2451static struct ctl_table debug_table[] = {
2452#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2453	{
2454		.procname	= "exception-trace",
2455		.data		= &show_unhandled_signals,
2456		.maxlen		= sizeof(int),
2457		.mode		= 0644,
2458		.proc_handler	= proc_dointvec
2459	},
2460#endif
2461	{ }
2462};
2463
2464static struct ctl_table dev_table[] = {
2465	{ }
2466};
2467
2468DECLARE_SYSCTL_BASE(kernel, kern_table);
2469DECLARE_SYSCTL_BASE(vm, vm_table);
2470DECLARE_SYSCTL_BASE(debug, debug_table);
2471DECLARE_SYSCTL_BASE(dev, dev_table);
2472
2473int __init sysctl_init_bases(void)
2474{
2475	register_sysctl_base(kernel);
2476	register_sysctl_base(vm);
2477	register_sysctl_base(debug);
2478	register_sysctl_base(dev);
2479
2480	return 0;
2481}
2482#endif /* CONFIG_SYSCTL */
2483/*
2484 * No sense putting this after each symbol definition, twice,
2485 * exception granted :-)
2486 */
2487EXPORT_SYMBOL(proc_dobool);
2488EXPORT_SYMBOL(proc_dointvec);
2489EXPORT_SYMBOL(proc_douintvec);
2490EXPORT_SYMBOL(proc_dointvec_jiffies);
2491EXPORT_SYMBOL(proc_dointvec_minmax);
2492EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2493EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2494EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2495EXPORT_SYMBOL(proc_dostring);
2496EXPORT_SYMBOL(proc_doulongvec_minmax);
2497EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2498EXPORT_SYMBOL(proc_do_large_bitmap);
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * sysctl.c: General linux system control interface
   4 *
   5 * Begun 24 March 1995, Stephen Tweedie
   6 * Added /proc support, Dec 1995
   7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
   8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
   9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
  10 * Dynamic registration fixes, Stephen Tweedie.
  11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
  12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
  13 *  Horn.
  14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
  15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
  17 *  Wendling.
  18 * The list_for_each() macro wasn't appropriate for the sysctl loop.
  19 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
  20 */
  21
  22#include <linux/module.h>
  23#include <linux/mm.h>
  24#include <linux/swap.h>
  25#include <linux/slab.h>
  26#include <linux/sysctl.h>
  27#include <linux/bitmap.h>
  28#include <linux/signal.h>
  29#include <linux/panic.h>
  30#include <linux/printk.h>
  31#include <linux/proc_fs.h>
  32#include <linux/security.h>
  33#include <linux/ctype.h>
  34#include <linux/kmemleak.h>
  35#include <linux/filter.h>
  36#include <linux/fs.h>
  37#include <linux/init.h>
  38#include <linux/kernel.h>
  39#include <linux/kobject.h>
  40#include <linux/net.h>
  41#include <linux/sysrq.h>
  42#include <linux/highuid.h>
  43#include <linux/writeback.h>
  44#include <linux/ratelimit.h>
 
  45#include <linux/hugetlb.h>
  46#include <linux/initrd.h>
  47#include <linux/key.h>
  48#include <linux/times.h>
  49#include <linux/limits.h>
  50#include <linux/dcache.h>
  51#include <linux/syscalls.h>
  52#include <linux/vmstat.h>
  53#include <linux/nfs_fs.h>
  54#include <linux/acpi.h>
  55#include <linux/reboot.h>
  56#include <linux/ftrace.h>
  57#include <linux/perf_event.h>
  58#include <linux/oom.h>
  59#include <linux/kmod.h>
  60#include <linux/capability.h>
  61#include <linux/binfmts.h>
  62#include <linux/sched/sysctl.h>
  63#include <linux/mount.h>
  64#include <linux/userfaultfd_k.h>
  65#include <linux/pid.h>
  66
  67#include "../lib/kstrtox.h"
  68
  69#include <linux/uaccess.h>
  70#include <asm/processor.h>
  71
  72#ifdef CONFIG_X86
  73#include <asm/nmi.h>
  74#include <asm/stacktrace.h>
  75#include <asm/io.h>
  76#endif
  77#ifdef CONFIG_SPARC
  78#include <asm/setup.h>
  79#endif
  80#ifdef CONFIG_RT_MUTEXES
  81#include <linux/rtmutex.h>
  82#endif
  83
  84/* shared constants to be used in various sysctls */
  85const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
  86EXPORT_SYMBOL(sysctl_vals);
  87
  88const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
  89EXPORT_SYMBOL_GPL(sysctl_long_vals);
  90
  91#if defined(CONFIG_SYSCTL)
  92
  93/* Constants used for minimum and maximum */
  94
  95#ifdef CONFIG_PERF_EVENTS
  96static const int six_hundred_forty_kb = 640 * 1024;
  97#endif
  98
  99
 100static const int ngroups_max = NGROUPS_MAX;
 101static const int cap_last_cap = CAP_LAST_CAP;
 102
 103#ifdef CONFIG_PROC_SYSCTL
 104
 105/**
 106 * enum sysctl_writes_mode - supported sysctl write modes
 107 *
 108 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
 109 *	to be written, and multiple writes on the same sysctl file descriptor
 110 *	will rewrite the sysctl value, regardless of file position. No warning
 111 *	is issued when the initial position is not 0.
 112 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
 113 *	not 0.
 114 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
 115 *	file position 0 and the value must be fully contained in the buffer
 116 *	sent to the write syscall. If dealing with strings respect the file
 117 *	position, but restrict this to the max length of the buffer, anything
 118 *	passed the max length will be ignored. Multiple writes will append
 119 *	to the buffer.
 120 *
 121 * These write modes control how current file position affects the behavior of
 122 * updating sysctl values through the proc interface on each write.
 123 */
 124enum sysctl_writes_mode {
 125	SYSCTL_WRITES_LEGACY		= -1,
 126	SYSCTL_WRITES_WARN		= 0,
 127	SYSCTL_WRITES_STRICT		= 1,
 128};
 129
 130static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
 131#endif /* CONFIG_PROC_SYSCTL */
 132
 133#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
 134    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
 135int sysctl_legacy_va_layout;
 136#endif
 137
 138#endif /* CONFIG_SYSCTL */
 139
 140/*
 141 * /proc/sys support
 142 */
 143
 144#ifdef CONFIG_PROC_SYSCTL
 145
 146static int _proc_do_string(char *data, int maxlen, int write,
 147		char *buffer, size_t *lenp, loff_t *ppos)
 148{
 149	size_t len;
 150	char c, *p;
 151
 152	if (!data || !maxlen || !*lenp) {
 153		*lenp = 0;
 154		return 0;
 155	}
 156
 157	if (write) {
 158		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
 159			/* Only continue writes not past the end of buffer. */
 160			len = strlen(data);
 161			if (len > maxlen - 1)
 162				len = maxlen - 1;
 163
 164			if (*ppos > len)
 165				return 0;
 166			len = *ppos;
 167		} else {
 168			/* Start writing from beginning of buffer. */
 169			len = 0;
 170		}
 171
 172		*ppos += *lenp;
 173		p = buffer;
 174		while ((p - buffer) < *lenp && len < maxlen - 1) {
 175			c = *(p++);
 176			if (c == 0 || c == '\n')
 177				break;
 178			data[len++] = c;
 179		}
 180		data[len] = 0;
 181	} else {
 182		len = strlen(data);
 183		if (len > maxlen)
 184			len = maxlen;
 185
 186		if (*ppos > len) {
 187			*lenp = 0;
 188			return 0;
 189		}
 190
 191		data += *ppos;
 192		len  -= *ppos;
 193
 194		if (len > *lenp)
 195			len = *lenp;
 196		if (len)
 197			memcpy(buffer, data, len);
 198		if (len < *lenp) {
 199			buffer[len] = '\n';
 200			len++;
 201		}
 202		*lenp = len;
 203		*ppos += len;
 204	}
 205	return 0;
 206}
 207
 208static void warn_sysctl_write(const struct ctl_table *table)
 209{
 210	pr_warn_once("%s wrote to %s when file position was not 0!\n"
 211		"This will not be supported in the future. To silence this\n"
 212		"warning, set kernel.sysctl_writes_strict = -1\n",
 213		current->comm, table->procname);
 214}
 215
 216/**
 217 * proc_first_pos_non_zero_ignore - check if first position is allowed
 218 * @ppos: file position
 219 * @table: the sysctl table
 220 *
 221 * Returns true if the first position is non-zero and the sysctl_writes_strict
 222 * mode indicates this is not allowed for numeric input types. String proc
 223 * handlers can ignore the return value.
 224 */
 225static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
 226					   const struct ctl_table *table)
 227{
 228	if (!*ppos)
 229		return false;
 230
 231	switch (sysctl_writes_strict) {
 232	case SYSCTL_WRITES_STRICT:
 233		return true;
 234	case SYSCTL_WRITES_WARN:
 235		warn_sysctl_write(table);
 236		return false;
 237	default:
 238		return false;
 239	}
 240}
 241
 242/**
 243 * proc_dostring - read a string sysctl
 244 * @table: the sysctl table
 245 * @write: %TRUE if this is a write to the sysctl file
 246 * @buffer: the user buffer
 247 * @lenp: the size of the user buffer
 248 * @ppos: file position
 249 *
 250 * Reads/writes a string from/to the user buffer. If the kernel
 251 * buffer provided is not large enough to hold the string, the
 252 * string is truncated. The copied string is %NULL-terminated.
 253 * If the string is being read by the user process, it is copied
 254 * and a newline '\n' is added. It is truncated if the buffer is
 255 * not large enough.
 256 *
 257 * Returns 0 on success.
 258 */
 259int proc_dostring(const struct ctl_table *table, int write,
 260		  void *buffer, size_t *lenp, loff_t *ppos)
 261{
 262	if (write)
 263		proc_first_pos_non_zero_ignore(ppos, table);
 264
 265	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
 266			ppos);
 267}
 268
 269static void proc_skip_spaces(char **buf, size_t *size)
 270{
 271	while (*size) {
 272		if (!isspace(**buf))
 273			break;
 274		(*size)--;
 275		(*buf)++;
 276	}
 277}
 278
 279static void proc_skip_char(char **buf, size_t *size, const char v)
 280{
 281	while (*size) {
 282		if (**buf != v)
 283			break;
 284		(*size)--;
 285		(*buf)++;
 286	}
 287}
 288
 289/**
 290 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
 291 *                   fail on overflow
 292 *
 293 * @cp: kernel buffer containing the string to parse
 294 * @endp: pointer to store the trailing characters
 295 * @base: the base to use
 296 * @res: where the parsed integer will be stored
 297 *
 298 * In case of success 0 is returned and @res will contain the parsed integer,
 299 * @endp will hold any trailing characters.
 300 * This function will fail the parse on overflow. If there wasn't an overflow
 301 * the function will defer the decision what characters count as invalid to the
 302 * caller.
 303 */
 304static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
 305			   unsigned long *res)
 306{
 307	unsigned long long result;
 308	unsigned int rv;
 309
 310	cp = _parse_integer_fixup_radix(cp, &base);
 311	rv = _parse_integer(cp, base, &result);
 312	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
 313		return -ERANGE;
 314
 315	cp += rv;
 316
 317	if (endp)
 318		*endp = (char *)cp;
 319
 320	*res = (unsigned long)result;
 321	return 0;
 322}
 323
 324#define TMPBUFLEN 22
 325/**
 326 * proc_get_long - reads an ASCII formatted integer from a user buffer
 327 *
 328 * @buf: a kernel buffer
 329 * @size: size of the kernel buffer
 330 * @val: this is where the number will be stored
 331 * @neg: set to %TRUE if number is negative
 332 * @perm_tr: a vector which contains the allowed trailers
 333 * @perm_tr_len: size of the perm_tr vector
 334 * @tr: pointer to store the trailer character
 335 *
 336 * In case of success %0 is returned and @buf and @size are updated with
 337 * the amount of bytes read. If @tr is non-NULL and a trailing
 338 * character exists (size is non-zero after returning from this
 339 * function), @tr is updated with the trailing character.
 340 */
 341static int proc_get_long(char **buf, size_t *size,
 342			  unsigned long *val, bool *neg,
 343			  const char *perm_tr, unsigned perm_tr_len, char *tr)
 344{
 345	char *p, tmp[TMPBUFLEN];
 346	ssize_t len = *size;
 347
 348	if (len <= 0)
 349		return -EINVAL;
 350
 351	if (len > TMPBUFLEN - 1)
 352		len = TMPBUFLEN - 1;
 353
 354	memcpy(tmp, *buf, len);
 355
 356	tmp[len] = 0;
 357	p = tmp;
 358	if (*p == '-' && *size > 1) {
 359		*neg = true;
 360		p++;
 361	} else
 362		*neg = false;
 363	if (!isdigit(*p))
 364		return -EINVAL;
 365
 366	if (strtoul_lenient(p, &p, 0, val))
 367		return -EINVAL;
 368
 369	len = p - tmp;
 370
 371	/* We don't know if the next char is whitespace thus we may accept
 372	 * invalid integers (e.g. 1234...a) or two integers instead of one
 373	 * (e.g. 123...1). So lets not allow such large numbers. */
 374	if (len == TMPBUFLEN - 1)
 375		return -EINVAL;
 376
 377	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
 378		return -EINVAL;
 379
 380	if (tr && (len < *size))
 381		*tr = *p;
 382
 383	*buf += len;
 384	*size -= len;
 385
 386	return 0;
 387}
 388
 389/**
 390 * proc_put_long - converts an integer to a decimal ASCII formatted string
 391 *
 392 * @buf: the user buffer
 393 * @size: the size of the user buffer
 394 * @val: the integer to be converted
 395 * @neg: sign of the number, %TRUE for negative
 396 *
 397 * In case of success @buf and @size are updated with the amount of bytes
 398 * written.
 399 */
 400static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
 401{
 402	int len;
 403	char tmp[TMPBUFLEN], *p = tmp;
 404
 405	sprintf(p, "%s%lu", neg ? "-" : "", val);
 406	len = strlen(tmp);
 407	if (len > *size)
 408		len = *size;
 409	memcpy(*buf, tmp, len);
 410	*size -= len;
 411	*buf += len;
 412}
 413#undef TMPBUFLEN
 414
 415static void proc_put_char(void **buf, size_t *size, char c)
 416{
 417	if (*size) {
 418		char **buffer = (char **)buf;
 419		**buffer = c;
 420
 421		(*size)--;
 422		(*buffer)++;
 423		*buf = *buffer;
 424	}
 425}
 426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 427static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 428				 int *valp,
 429				 int write, void *data)
 430{
 431	if (write) {
 432		if (*negp) {
 433			if (*lvalp > (unsigned long) INT_MAX + 1)
 434				return -EINVAL;
 435			WRITE_ONCE(*valp, -*lvalp);
 436		} else {
 437			if (*lvalp > (unsigned long) INT_MAX)
 438				return -EINVAL;
 439			WRITE_ONCE(*valp, *lvalp);
 440		}
 441	} else {
 442		int val = READ_ONCE(*valp);
 443		if (val < 0) {
 444			*negp = true;
 445			*lvalp = -(unsigned long)val;
 446		} else {
 447			*negp = false;
 448			*lvalp = (unsigned long)val;
 449		}
 450	}
 451	return 0;
 452}
 453
 454static int do_proc_douintvec_conv(unsigned long *lvalp,
 455				  unsigned int *valp,
 456				  int write, void *data)
 457{
 458	if (write) {
 459		if (*lvalp > UINT_MAX)
 460			return -EINVAL;
 461		WRITE_ONCE(*valp, *lvalp);
 462	} else {
 463		unsigned int val = READ_ONCE(*valp);
 464		*lvalp = (unsigned long)val;
 465	}
 466	return 0;
 467}
 468
 469static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
 470
 471static int __do_proc_dointvec(void *tbl_data, const struct ctl_table *table,
 472		  int write, void *buffer,
 473		  size_t *lenp, loff_t *ppos,
 474		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
 475			      int write, void *data),
 476		  void *data)
 477{
 478	int *i, vleft, first = 1, err = 0;
 479	size_t left;
 480	char *p;
 481
 482	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
 483		*lenp = 0;
 484		return 0;
 485	}
 486
 487	i = (int *) tbl_data;
 488	vleft = table->maxlen / sizeof(*i);
 489	left = *lenp;
 490
 491	if (!conv)
 492		conv = do_proc_dointvec_conv;
 493
 494	if (write) {
 495		if (proc_first_pos_non_zero_ignore(ppos, table))
 496			goto out;
 497
 498		if (left > PAGE_SIZE - 1)
 499			left = PAGE_SIZE - 1;
 500		p = buffer;
 501	}
 502
 503	for (; left && vleft--; i++, first=0) {
 504		unsigned long lval;
 505		bool neg;
 506
 507		if (write) {
 508			proc_skip_spaces(&p, &left);
 509
 510			if (!left)
 511				break;
 512			err = proc_get_long(&p, &left, &lval, &neg,
 513					     proc_wspace_sep,
 514					     sizeof(proc_wspace_sep), NULL);
 515			if (err)
 516				break;
 517			if (conv(&neg, &lval, i, 1, data)) {
 518				err = -EINVAL;
 519				break;
 520			}
 521		} else {
 522			if (conv(&neg, &lval, i, 0, data)) {
 523				err = -EINVAL;
 524				break;
 525			}
 526			if (!first)
 527				proc_put_char(&buffer, &left, '\t');
 528			proc_put_long(&buffer, &left, lval, neg);
 529		}
 530	}
 531
 532	if (!write && !first && left && !err)
 533		proc_put_char(&buffer, &left, '\n');
 534	if (write && !err && left)
 535		proc_skip_spaces(&p, &left);
 536	if (write && first)
 537		return err ? : -EINVAL;
 538	*lenp -= left;
 539out:
 540	*ppos += *lenp;
 541	return err;
 542}
 543
 544static int do_proc_dointvec(const struct ctl_table *table, int write,
 545		  void *buffer, size_t *lenp, loff_t *ppos,
 546		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
 547			      int write, void *data),
 548		  void *data)
 549{
 550	return __do_proc_dointvec(table->data, table, write,
 551			buffer, lenp, ppos, conv, data);
 552}
 553
 554static int do_proc_douintvec_w(unsigned int *tbl_data,
 555			       const struct ctl_table *table,
 556			       void *buffer,
 557			       size_t *lenp, loff_t *ppos,
 558			       int (*conv)(unsigned long *lvalp,
 559					   unsigned int *valp,
 560					   int write, void *data),
 561			       void *data)
 562{
 563	unsigned long lval;
 564	int err = 0;
 565	size_t left;
 566	bool neg;
 567	char *p = buffer;
 568
 569	left = *lenp;
 570
 571	if (proc_first_pos_non_zero_ignore(ppos, table))
 572		goto bail_early;
 573
 574	if (left > PAGE_SIZE - 1)
 575		left = PAGE_SIZE - 1;
 576
 577	proc_skip_spaces(&p, &left);
 578	if (!left) {
 579		err = -EINVAL;
 580		goto out_free;
 581	}
 582
 583	err = proc_get_long(&p, &left, &lval, &neg,
 584			     proc_wspace_sep,
 585			     sizeof(proc_wspace_sep), NULL);
 586	if (err || neg) {
 587		err = -EINVAL;
 588		goto out_free;
 589	}
 590
 591	if (conv(&lval, tbl_data, 1, data)) {
 592		err = -EINVAL;
 593		goto out_free;
 594	}
 595
 596	if (!err && left)
 597		proc_skip_spaces(&p, &left);
 598
 599out_free:
 600	if (err)
 601		return -EINVAL;
 602
 603	return 0;
 604
 605	/* This is in keeping with old __do_proc_dointvec() */
 606bail_early:
 607	*ppos += *lenp;
 608	return err;
 609}
 610
 611static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
 612			       size_t *lenp, loff_t *ppos,
 613			       int (*conv)(unsigned long *lvalp,
 614					   unsigned int *valp,
 615					   int write, void *data),
 616			       void *data)
 617{
 618	unsigned long lval;
 619	int err = 0;
 620	size_t left;
 621
 622	left = *lenp;
 623
 624	if (conv(&lval, tbl_data, 0, data)) {
 625		err = -EINVAL;
 626		goto out;
 627	}
 628
 629	proc_put_long(&buffer, &left, lval, false);
 630	if (!left)
 631		goto out;
 632
 633	proc_put_char(&buffer, &left, '\n');
 634
 635out:
 636	*lenp -= left;
 637	*ppos += *lenp;
 638
 639	return err;
 640}
 641
 642static int __do_proc_douintvec(void *tbl_data, const struct ctl_table *table,
 643			       int write, void *buffer,
 644			       size_t *lenp, loff_t *ppos,
 645			       int (*conv)(unsigned long *lvalp,
 646					   unsigned int *valp,
 647					   int write, void *data),
 648			       void *data)
 649{
 650	unsigned int *i, vleft;
 651
 652	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
 653		*lenp = 0;
 654		return 0;
 655	}
 656
 657	i = (unsigned int *) tbl_data;
 658	vleft = table->maxlen / sizeof(*i);
 659
 660	/*
 661	 * Arrays are not supported, keep this simple. *Do not* add
 662	 * support for them.
 663	 */
 664	if (vleft != 1) {
 665		*lenp = 0;
 666		return -EINVAL;
 667	}
 668
 669	if (!conv)
 670		conv = do_proc_douintvec_conv;
 671
 672	if (write)
 673		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
 674					   conv, data);
 675	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
 676}
 677
 678int do_proc_douintvec(const struct ctl_table *table, int write,
 679		      void *buffer, size_t *lenp, loff_t *ppos,
 680		      int (*conv)(unsigned long *lvalp,
 681				  unsigned int *valp,
 682				  int write, void *data),
 683		      void *data)
 684{
 685	return __do_proc_douintvec(table->data, table, write,
 686				   buffer, lenp, ppos, conv, data);
 687}
 688
 689/**
 690 * proc_dobool - read/write a bool
 691 * @table: the sysctl table
 692 * @write: %TRUE if this is a write to the sysctl file
 693 * @buffer: the user buffer
 694 * @lenp: the size of the user buffer
 695 * @ppos: file position
 696 *
 697 * Reads/writes one integer value from/to the user buffer,
 698 * treated as an ASCII string.
 699 *
 700 * table->data must point to a bool variable and table->maxlen must
 701 * be sizeof(bool).
 702 *
 703 * Returns 0 on success.
 704 */
 705int proc_dobool(const struct ctl_table *table, int write, void *buffer,
 706		size_t *lenp, loff_t *ppos)
 707{
 708	struct ctl_table tmp;
 709	bool *data = table->data;
 710	int res, val;
 711
 712	/* Do not support arrays yet. */
 713	if (table->maxlen != sizeof(bool))
 714		return -EINVAL;
 715
 716	tmp = *table;
 717	tmp.maxlen = sizeof(val);
 718	tmp.data = &val;
 719
 720	val = READ_ONCE(*data);
 721	res = proc_dointvec(&tmp, write, buffer, lenp, ppos);
 722	if (res)
 723		return res;
 724	if (write)
 725		WRITE_ONCE(*data, val);
 726	return 0;
 727}
 728
 729/**
 730 * proc_dointvec - read a vector of integers
 731 * @table: the sysctl table
 732 * @write: %TRUE if this is a write to the sysctl file
 733 * @buffer: the user buffer
 734 * @lenp: the size of the user buffer
 735 * @ppos: file position
 736 *
 737 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
 738 * values from/to the user buffer, treated as an ASCII string.
 739 *
 740 * Returns 0 on success.
 741 */
 742int proc_dointvec(const struct ctl_table *table, int write, void *buffer,
 743		  size_t *lenp, loff_t *ppos)
 744{
 745	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
 746}
 747
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 748/**
 749 * proc_douintvec - read a vector of unsigned integers
 750 * @table: the sysctl table
 751 * @write: %TRUE if this is a write to the sysctl file
 752 * @buffer: the user buffer
 753 * @lenp: the size of the user buffer
 754 * @ppos: file position
 755 *
 756 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
 757 * values from/to the user buffer, treated as an ASCII string.
 758 *
 759 * Returns 0 on success.
 760 */
 761int proc_douintvec(const struct ctl_table *table, int write, void *buffer,
 762		size_t *lenp, loff_t *ppos)
 763{
 764	return do_proc_douintvec(table, write, buffer, lenp, ppos,
 765				 do_proc_douintvec_conv, NULL);
 766}
 767
 768/*
 769 * Taint values can only be increased
 770 * This means we can safely use a temporary.
 771 */
 772static int proc_taint(const struct ctl_table *table, int write,
 773			       void *buffer, size_t *lenp, loff_t *ppos)
 774{
 775	struct ctl_table t;
 776	unsigned long tmptaint = get_taint();
 777	int err;
 778
 779	if (write && !capable(CAP_SYS_ADMIN))
 780		return -EPERM;
 781
 782	t = *table;
 783	t.data = &tmptaint;
 784	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
 785	if (err < 0)
 786		return err;
 787
 788	if (write) {
 789		int i;
 790
 791		/*
 792		 * If we are relying on panic_on_taint not producing
 793		 * false positives due to userspace input, bail out
 794		 * before setting the requested taint flags.
 795		 */
 796		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
 797			return -EINVAL;
 798
 799		/*
 800		 * Poor man's atomic or. Not worth adding a primitive
 801		 * to everyone's atomic.h for this
 802		 */
 803		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
 804			if ((1UL << i) & tmptaint)
 805				add_taint(i, LOCKDEP_STILL_OK);
 806	}
 807
 808	return err;
 809}
 810
 811/**
 812 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
 813 * @min: pointer to minimum allowable value
 814 * @max: pointer to maximum allowable value
 815 *
 816 * The do_proc_dointvec_minmax_conv_param structure provides the
 817 * minimum and maximum values for doing range checking for those sysctl
 818 * parameters that use the proc_dointvec_minmax() handler.
 819 */
 820struct do_proc_dointvec_minmax_conv_param {
 821	int *min;
 822	int *max;
 823};
 824
 825static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
 826					int *valp,
 827					int write, void *data)
 828{
 829	int tmp, ret;
 830	struct do_proc_dointvec_minmax_conv_param *param = data;
 831	/*
 832	 * If writing, first do so via a temporary local int so we can
 833	 * bounds-check it before touching *valp.
 834	 */
 835	int *ip = write ? &tmp : valp;
 836
 837	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
 838	if (ret)
 839		return ret;
 840
 841	if (write) {
 842		if ((param->min && *param->min > tmp) ||
 843		    (param->max && *param->max < tmp))
 844			return -EINVAL;
 845		WRITE_ONCE(*valp, tmp);
 846	}
 847
 848	return 0;
 849}
 850
 851/**
 852 * proc_dointvec_minmax - read a vector of integers with min/max values
 853 * @table: the sysctl table
 854 * @write: %TRUE if this is a write to the sysctl file
 855 * @buffer: the user buffer
 856 * @lenp: the size of the user buffer
 857 * @ppos: file position
 858 *
 859 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
 860 * values from/to the user buffer, treated as an ASCII string.
 861 *
 862 * This routine will ensure the values are within the range specified by
 863 * table->extra1 (min) and table->extra2 (max).
 864 *
 865 * Returns 0 on success or -EINVAL on write when the range check fails.
 866 */
 867int proc_dointvec_minmax(const struct ctl_table *table, int write,
 868		  void *buffer, size_t *lenp, loff_t *ppos)
 869{
 870	struct do_proc_dointvec_minmax_conv_param param = {
 871		.min = (int *) table->extra1,
 872		.max = (int *) table->extra2,
 873	};
 874	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 875				do_proc_dointvec_minmax_conv, &param);
 876}
 877
 878/**
 879 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
 880 * @min: pointer to minimum allowable value
 881 * @max: pointer to maximum allowable value
 882 *
 883 * The do_proc_douintvec_minmax_conv_param structure provides the
 884 * minimum and maximum values for doing range checking for those sysctl
 885 * parameters that use the proc_douintvec_minmax() handler.
 886 */
 887struct do_proc_douintvec_minmax_conv_param {
 888	unsigned int *min;
 889	unsigned int *max;
 890};
 891
 892static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
 893					 unsigned int *valp,
 894					 int write, void *data)
 895{
 896	int ret;
 897	unsigned int tmp;
 898	struct do_proc_douintvec_minmax_conv_param *param = data;
 899	/* write via temporary local uint for bounds-checking */
 900	unsigned int *up = write ? &tmp : valp;
 901
 902	ret = do_proc_douintvec_conv(lvalp, up, write, data);
 903	if (ret)
 904		return ret;
 905
 906	if (write) {
 907		if ((param->min && *param->min > tmp) ||
 908		    (param->max && *param->max < tmp))
 909			return -ERANGE;
 910
 911		WRITE_ONCE(*valp, tmp);
 912	}
 913
 914	return 0;
 915}
 916
 917/**
 918 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
 919 * @table: the sysctl table
 920 * @write: %TRUE if this is a write to the sysctl file
 921 * @buffer: the user buffer
 922 * @lenp: the size of the user buffer
 923 * @ppos: file position
 924 *
 925 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
 926 * values from/to the user buffer, treated as an ASCII string. Negative
 927 * strings are not allowed.
 928 *
 929 * This routine will ensure the values are within the range specified by
 930 * table->extra1 (min) and table->extra2 (max). There is a final sanity
 931 * check for UINT_MAX to avoid having to support wrap around uses from
 932 * userspace.
 933 *
 934 * Returns 0 on success or -ERANGE on write when the range check fails.
 935 */
 936int proc_douintvec_minmax(const struct ctl_table *table, int write,
 937			  void *buffer, size_t *lenp, loff_t *ppos)
 938{
 939	struct do_proc_douintvec_minmax_conv_param param = {
 940		.min = (unsigned int *) table->extra1,
 941		.max = (unsigned int *) table->extra2,
 942	};
 943	return do_proc_douintvec(table, write, buffer, lenp, ppos,
 944				 do_proc_douintvec_minmax_conv, &param);
 945}
 946
 947/**
 948 * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
 949 * @table: the sysctl table
 950 * @write: %TRUE if this is a write to the sysctl file
 951 * @buffer: the user buffer
 952 * @lenp: the size of the user buffer
 953 * @ppos: file position
 954 *
 955 * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
 956 * values from/to the user buffer, treated as an ASCII string. Negative
 957 * strings are not allowed.
 958 *
 959 * This routine will ensure the values are within the range specified by
 960 * table->extra1 (min) and table->extra2 (max).
 961 *
 962 * Returns 0 on success or an error on write when the range check fails.
 963 */
 964int proc_dou8vec_minmax(const struct ctl_table *table, int write,
 965			void *buffer, size_t *lenp, loff_t *ppos)
 966{
 967	struct ctl_table tmp;
 968	unsigned int min = 0, max = 255U, val;
 969	u8 *data = table->data;
 970	struct do_proc_douintvec_minmax_conv_param param = {
 971		.min = &min,
 972		.max = &max,
 973	};
 974	int res;
 975
 976	/* Do not support arrays yet. */
 977	if (table->maxlen != sizeof(u8))
 978		return -EINVAL;
 979
 980	if (table->extra1)
 981		min = *(unsigned int *) table->extra1;
 982	if (table->extra2)
 
 
 
 983		max = *(unsigned int *) table->extra2;
 
 
 
 984
 985	tmp = *table;
 986
 987	tmp.maxlen = sizeof(val);
 988	tmp.data = &val;
 989	val = READ_ONCE(*data);
 990	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
 991				do_proc_douintvec_minmax_conv, &param);
 992	if (res)
 993		return res;
 994	if (write)
 995		WRITE_ONCE(*data, val);
 996	return 0;
 997}
 998EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
 999
1000#ifdef CONFIG_MAGIC_SYSRQ
1001static int sysrq_sysctl_handler(const struct ctl_table *table, int write,
1002				void *buffer, size_t *lenp, loff_t *ppos)
1003{
1004	int tmp, ret;
1005
1006	tmp = sysrq_mask();
1007
1008	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1009			       lenp, ppos, NULL, NULL);
1010	if (ret || !write)
1011		return ret;
1012
1013	if (write)
1014		sysrq_toggle_support(tmp);
1015
1016	return 0;
1017}
1018#endif
1019
1020static int __do_proc_doulongvec_minmax(void *data,
1021		const struct ctl_table *table, int write,
1022		void *buffer, size_t *lenp, loff_t *ppos,
1023		unsigned long convmul, unsigned long convdiv)
1024{
1025	unsigned long *i, *min, *max;
1026	int vleft, first = 1, err = 0;
1027	size_t left;
1028	char *p;
1029
1030	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1031		*lenp = 0;
1032		return 0;
1033	}
1034
1035	i = data;
1036	min = table->extra1;
1037	max = table->extra2;
1038	vleft = table->maxlen / sizeof(unsigned long);
1039	left = *lenp;
1040
1041	if (write) {
1042		if (proc_first_pos_non_zero_ignore(ppos, table))
1043			goto out;
1044
1045		if (left > PAGE_SIZE - 1)
1046			left = PAGE_SIZE - 1;
1047		p = buffer;
1048	}
1049
1050	for (; left && vleft--; i++, first = 0) {
1051		unsigned long val;
1052
1053		if (write) {
1054			bool neg;
1055
1056			proc_skip_spaces(&p, &left);
1057			if (!left)
1058				break;
1059
1060			err = proc_get_long(&p, &left, &val, &neg,
1061					     proc_wspace_sep,
1062					     sizeof(proc_wspace_sep), NULL);
1063			if (err || neg) {
1064				err = -EINVAL;
1065				break;
1066			}
1067
1068			val = convmul * val / convdiv;
1069			if ((min && val < *min) || (max && val > *max)) {
1070				err = -EINVAL;
1071				break;
1072			}
1073			WRITE_ONCE(*i, val);
1074		} else {
1075			val = convdiv * READ_ONCE(*i) / convmul;
1076			if (!first)
1077				proc_put_char(&buffer, &left, '\t');
1078			proc_put_long(&buffer, &left, val, false);
1079		}
1080	}
1081
1082	if (!write && !first && left && !err)
1083		proc_put_char(&buffer, &left, '\n');
1084	if (write && !err)
1085		proc_skip_spaces(&p, &left);
1086	if (write && first)
1087		return err ? : -EINVAL;
1088	*lenp -= left;
1089out:
1090	*ppos += *lenp;
1091	return err;
1092}
1093
1094static int do_proc_doulongvec_minmax(const struct ctl_table *table, int write,
1095		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1096		unsigned long convdiv)
1097{
1098	return __do_proc_doulongvec_minmax(table->data, table, write,
1099			buffer, lenp, ppos, convmul, convdiv);
1100}
1101
1102/**
1103 * proc_doulongvec_minmax - read a vector of long integers with min/max values
1104 * @table: the sysctl table
1105 * @write: %TRUE if this is a write to the sysctl file
1106 * @buffer: the user buffer
1107 * @lenp: the size of the user buffer
1108 * @ppos: file position
1109 *
1110 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1111 * values from/to the user buffer, treated as an ASCII string.
1112 *
1113 * This routine will ensure the values are within the range specified by
1114 * table->extra1 (min) and table->extra2 (max).
1115 *
1116 * Returns 0 on success.
1117 */
1118int proc_doulongvec_minmax(const struct ctl_table *table, int write,
1119			   void *buffer, size_t *lenp, loff_t *ppos)
1120{
1121    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1122}
1123
1124/**
1125 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1126 * @table: the sysctl table
1127 * @write: %TRUE if this is a write to the sysctl file
1128 * @buffer: the user buffer
1129 * @lenp: the size of the user buffer
1130 * @ppos: file position
1131 *
1132 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1133 * values from/to the user buffer, treated as an ASCII string. The values
1134 * are treated as milliseconds, and converted to jiffies when they are stored.
1135 *
1136 * This routine will ensure the values are within the range specified by
1137 * table->extra1 (min) and table->extra2 (max).
1138 *
1139 * Returns 0 on success.
1140 */
1141int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
1142				      void *buffer, size_t *lenp, loff_t *ppos)
1143{
1144    return do_proc_doulongvec_minmax(table, write, buffer,
1145				     lenp, ppos, HZ, 1000l);
1146}
1147
1148
1149static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1150					 int *valp,
1151					 int write, void *data)
1152{
1153	if (write) {
1154		if (*lvalp > INT_MAX / HZ)
1155			return 1;
1156		if (*negp)
1157			WRITE_ONCE(*valp, -*lvalp * HZ);
1158		else
1159			WRITE_ONCE(*valp, *lvalp * HZ);
1160	} else {
1161		int val = READ_ONCE(*valp);
1162		unsigned long lval;
1163		if (val < 0) {
1164			*negp = true;
1165			lval = -(unsigned long)val;
1166		} else {
1167			*negp = false;
1168			lval = (unsigned long)val;
1169		}
1170		*lvalp = lval / HZ;
1171	}
1172	return 0;
1173}
1174
1175static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1176						int *valp,
1177						int write, void *data)
1178{
1179	if (write) {
1180		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1181			return 1;
1182		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1183	} else {
1184		int val = *valp;
1185		unsigned long lval;
1186		if (val < 0) {
1187			*negp = true;
1188			lval = -(unsigned long)val;
1189		} else {
1190			*negp = false;
1191			lval = (unsigned long)val;
1192		}
1193		*lvalp = jiffies_to_clock_t(lval);
1194	}
1195	return 0;
1196}
1197
1198static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1199					    int *valp,
1200					    int write, void *data)
1201{
1202	if (write) {
1203		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1204
1205		if (jif > INT_MAX)
1206			return 1;
1207		WRITE_ONCE(*valp, (int)jif);
1208	} else {
1209		int val = READ_ONCE(*valp);
1210		unsigned long lval;
1211		if (val < 0) {
1212			*negp = true;
1213			lval = -(unsigned long)val;
1214		} else {
1215			*negp = false;
1216			lval = (unsigned long)val;
1217		}
1218		*lvalp = jiffies_to_msecs(lval);
1219	}
1220	return 0;
1221}
1222
1223static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1224						int *valp, int write, void *data)
1225{
1226	int tmp, ret;
1227	struct do_proc_dointvec_minmax_conv_param *param = data;
1228	/*
1229	 * If writing, first do so via a temporary local int so we can
1230	 * bounds-check it before touching *valp.
1231	 */
1232	int *ip = write ? &tmp : valp;
1233
1234	ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1235	if (ret)
1236		return ret;
1237
1238	if (write) {
1239		if ((param->min && *param->min > tmp) ||
1240				(param->max && *param->max < tmp))
1241			return -EINVAL;
1242		*valp = tmp;
1243	}
1244	return 0;
1245}
1246
1247/**
1248 * proc_dointvec_jiffies - read a vector of integers as seconds
1249 * @table: the sysctl table
1250 * @write: %TRUE if this is a write to the sysctl file
1251 * @buffer: the user buffer
1252 * @lenp: the size of the user buffer
1253 * @ppos: file position
1254 *
1255 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1256 * values from/to the user buffer, treated as an ASCII string.
1257 * The values read are assumed to be in seconds, and are converted into
1258 * jiffies.
1259 *
1260 * Returns 0 on success.
1261 */
1262int proc_dointvec_jiffies(const struct ctl_table *table, int write,
1263			  void *buffer, size_t *lenp, loff_t *ppos)
1264{
1265    return do_proc_dointvec(table,write,buffer,lenp,ppos,
1266		    	    do_proc_dointvec_jiffies_conv,NULL);
1267}
1268
1269int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
1270			  void *buffer, size_t *lenp, loff_t *ppos)
1271{
1272	struct do_proc_dointvec_minmax_conv_param param = {
1273		.min = (int *) table->extra1,
1274		.max = (int *) table->extra2,
1275	};
1276	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1277			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1278}
1279
1280/**
1281 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1282 * @table: the sysctl table
1283 * @write: %TRUE if this is a write to the sysctl file
1284 * @buffer: the user buffer
1285 * @lenp: the size of the user buffer
1286 * @ppos: pointer to the file position
1287 *
1288 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1289 * values from/to the user buffer, treated as an ASCII string.
1290 * The values read are assumed to be in 1/USER_HZ seconds, and
1291 * are converted into jiffies.
1292 *
1293 * Returns 0 on success.
1294 */
1295int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int write,
1296				 void *buffer, size_t *lenp, loff_t *ppos)
1297{
1298	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1299				do_proc_dointvec_userhz_jiffies_conv, NULL);
1300}
1301
1302/**
1303 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1304 * @table: the sysctl table
1305 * @write: %TRUE if this is a write to the sysctl file
1306 * @buffer: the user buffer
1307 * @lenp: the size of the user buffer
 
1308 * @ppos: the current position in the file
1309 *
1310 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1311 * values from/to the user buffer, treated as an ASCII string.
1312 * The values read are assumed to be in 1/1000 seconds, and
1313 * are converted into jiffies.
1314 *
1315 * Returns 0 on success.
1316 */
1317int proc_dointvec_ms_jiffies(const struct ctl_table *table, int write, void *buffer,
1318		size_t *lenp, loff_t *ppos)
1319{
1320	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1321				do_proc_dointvec_ms_jiffies_conv, NULL);
1322}
1323
1324static int proc_do_cad_pid(const struct ctl_table *table, int write, void *buffer,
1325		size_t *lenp, loff_t *ppos)
1326{
1327	struct pid *new_pid;
1328	pid_t tmp;
1329	int r;
1330
1331	tmp = pid_vnr(cad_pid);
1332
1333	r = __do_proc_dointvec(&tmp, table, write, buffer,
1334			       lenp, ppos, NULL, NULL);
1335	if (r || !write)
1336		return r;
1337
1338	new_pid = find_get_pid(tmp);
1339	if (!new_pid)
1340		return -ESRCH;
1341
1342	put_pid(xchg(&cad_pid, new_pid));
1343	return 0;
1344}
1345
1346/**
1347 * proc_do_large_bitmap - read/write from/to a large bitmap
1348 * @table: the sysctl table
1349 * @write: %TRUE if this is a write to the sysctl file
1350 * @buffer: the user buffer
1351 * @lenp: the size of the user buffer
1352 * @ppos: file position
1353 *
1354 * The bitmap is stored at table->data and the bitmap length (in bits)
1355 * in table->maxlen.
1356 *
1357 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1358 * large bitmaps may be represented in a compact manner. Writing into
1359 * the file will clear the bitmap then update it with the given input.
1360 *
1361 * Returns 0 on success.
1362 */
1363int proc_do_large_bitmap(const struct ctl_table *table, int write,
1364			 void *buffer, size_t *lenp, loff_t *ppos)
1365{
1366	int err = 0;
1367	size_t left = *lenp;
1368	unsigned long bitmap_len = table->maxlen;
1369	unsigned long *bitmap = *(unsigned long **) table->data;
1370	unsigned long *tmp_bitmap = NULL;
1371	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1372
1373	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1374		*lenp = 0;
1375		return 0;
1376	}
1377
1378	if (write) {
1379		char *p = buffer;
1380		size_t skipped = 0;
1381
1382		if (left > PAGE_SIZE - 1) {
1383			left = PAGE_SIZE - 1;
1384			/* How much of the buffer we'll skip this pass */
1385			skipped = *lenp - left;
1386		}
1387
1388		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1389		if (!tmp_bitmap)
1390			return -ENOMEM;
1391		proc_skip_char(&p, &left, '\n');
1392		while (!err && left) {
1393			unsigned long val_a, val_b;
1394			bool neg;
1395			size_t saved_left;
1396
1397			/* In case we stop parsing mid-number, we can reset */
1398			saved_left = left;
1399			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1400					     sizeof(tr_a), &c);
1401			/*
1402			 * If we consumed the entirety of a truncated buffer or
1403			 * only one char is left (may be a "-"), then stop here,
1404			 * reset, & come back for more.
1405			 */
1406			if ((left <= 1) && skipped) {
1407				left = saved_left;
1408				break;
1409			}
1410
1411			if (err)
1412				break;
1413			if (val_a >= bitmap_len || neg) {
1414				err = -EINVAL;
1415				break;
1416			}
1417
1418			val_b = val_a;
1419			if (left) {
1420				p++;
1421				left--;
1422			}
1423
1424			if (c == '-') {
1425				err = proc_get_long(&p, &left, &val_b,
1426						     &neg, tr_b, sizeof(tr_b),
1427						     &c);
1428				/*
1429				 * If we consumed all of a truncated buffer or
1430				 * then stop here, reset, & come back for more.
1431				 */
1432				if (!left && skipped) {
1433					left = saved_left;
1434					break;
1435				}
1436
1437				if (err)
1438					break;
1439				if (val_b >= bitmap_len || neg ||
1440				    val_a > val_b) {
1441					err = -EINVAL;
1442					break;
1443				}
1444				if (left) {
1445					p++;
1446					left--;
1447				}
1448			}
1449
1450			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1451			proc_skip_char(&p, &left, '\n');
1452		}
1453		left += skipped;
1454	} else {
1455		unsigned long bit_a, bit_b = 0;
1456		bool first = 1;
1457
1458		while (left) {
1459			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1460			if (bit_a >= bitmap_len)
1461				break;
1462			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1463						   bit_a + 1) - 1;
1464
1465			if (!first)
1466				proc_put_char(&buffer, &left, ',');
1467			proc_put_long(&buffer, &left, bit_a, false);
1468			if (bit_a != bit_b) {
1469				proc_put_char(&buffer, &left, '-');
1470				proc_put_long(&buffer, &left, bit_b, false);
1471			}
1472
1473			first = 0; bit_b++;
1474		}
1475		proc_put_char(&buffer, &left, '\n');
1476	}
1477
1478	if (!err) {
1479		if (write) {
1480			if (*ppos)
1481				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1482			else
1483				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1484		}
1485		*lenp -= left;
1486		*ppos += *lenp;
1487	}
1488
1489	bitmap_free(tmp_bitmap);
1490	return err;
1491}
1492
1493#else /* CONFIG_PROC_SYSCTL */
1494
1495int proc_dostring(const struct ctl_table *table, int write,
1496		  void *buffer, size_t *lenp, loff_t *ppos)
1497{
1498	return -ENOSYS;
1499}
1500
1501int proc_dobool(const struct ctl_table *table, int write,
1502		void *buffer, size_t *lenp, loff_t *ppos)
1503{
1504	return -ENOSYS;
1505}
1506
1507int proc_dointvec(const struct ctl_table *table, int write,
1508		  void *buffer, size_t *lenp, loff_t *ppos)
1509{
1510	return -ENOSYS;
1511}
1512
1513int proc_douintvec(const struct ctl_table *table, int write,
1514		  void *buffer, size_t *lenp, loff_t *ppos)
1515{
1516	return -ENOSYS;
1517}
1518
1519int proc_dointvec_minmax(const struct ctl_table *table, int write,
1520		    void *buffer, size_t *lenp, loff_t *ppos)
1521{
1522	return -ENOSYS;
1523}
1524
1525int proc_douintvec_minmax(const struct ctl_table *table, int write,
1526			  void *buffer, size_t *lenp, loff_t *ppos)
1527{
1528	return -ENOSYS;
1529}
1530
1531int proc_dou8vec_minmax(const struct ctl_table *table, int write,
1532			void *buffer, size_t *lenp, loff_t *ppos)
1533{
1534	return -ENOSYS;
1535}
1536
1537int proc_dointvec_jiffies(const struct ctl_table *table, int write,
1538		    void *buffer, size_t *lenp, loff_t *ppos)
1539{
1540	return -ENOSYS;
1541}
1542
1543int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
1544				    void *buffer, size_t *lenp, loff_t *ppos)
1545{
1546	return -ENOSYS;
1547}
1548
1549int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int write,
1550		    void *buffer, size_t *lenp, loff_t *ppos)
1551{
1552	return -ENOSYS;
1553}
1554
1555int proc_dointvec_ms_jiffies(const struct ctl_table *table, int write,
1556			     void *buffer, size_t *lenp, loff_t *ppos)
1557{
1558	return -ENOSYS;
1559}
1560
1561int proc_doulongvec_minmax(const struct ctl_table *table, int write,
1562		    void *buffer, size_t *lenp, loff_t *ppos)
1563{
1564	return -ENOSYS;
1565}
1566
1567int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
1568				      void *buffer, size_t *lenp, loff_t *ppos)
1569{
1570	return -ENOSYS;
1571}
1572
1573int proc_do_large_bitmap(const struct ctl_table *table, int write,
1574			 void *buffer, size_t *lenp, loff_t *ppos)
1575{
1576	return -ENOSYS;
1577}
1578
1579#endif /* CONFIG_PROC_SYSCTL */
1580
1581#if defined(CONFIG_SYSCTL)
1582int proc_do_static_key(const struct ctl_table *table, int write,
1583		       void *buffer, size_t *lenp, loff_t *ppos)
1584{
1585	struct static_key *key = (struct static_key *)table->data;
1586	static DEFINE_MUTEX(static_key_mutex);
1587	int val, ret;
1588	struct ctl_table tmp = {
1589		.data   = &val,
1590		.maxlen = sizeof(val),
1591		.mode   = table->mode,
1592		.extra1 = SYSCTL_ZERO,
1593		.extra2 = SYSCTL_ONE,
1594	};
1595
1596	if (write && !capable(CAP_SYS_ADMIN))
1597		return -EPERM;
1598
1599	mutex_lock(&static_key_mutex);
1600	val = static_key_enabled(key);
1601	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1602	if (write && !ret) {
1603		if (val)
1604			static_key_enable(key);
1605		else
1606			static_key_disable(key);
1607	}
1608	mutex_unlock(&static_key_mutex);
1609	return ret;
1610}
1611
1612static struct ctl_table kern_table[] = {
1613	{
1614		.procname	= "panic",
1615		.data		= &panic_timeout,
1616		.maxlen		= sizeof(int),
1617		.mode		= 0644,
1618		.proc_handler	= proc_dointvec,
1619	},
1620#ifdef CONFIG_PROC_SYSCTL
1621	{
1622		.procname	= "tainted",
1623		.maxlen 	= sizeof(long),
1624		.mode		= 0644,
1625		.proc_handler	= proc_taint,
1626	},
1627	{
1628		.procname	= "sysctl_writes_strict",
1629		.data		= &sysctl_writes_strict,
1630		.maxlen		= sizeof(int),
1631		.mode		= 0644,
1632		.proc_handler	= proc_dointvec_minmax,
1633		.extra1		= SYSCTL_NEG_ONE,
1634		.extra2		= SYSCTL_ONE,
1635	},
1636#endif
1637	{
1638		.procname	= "print-fatal-signals",
1639		.data		= &print_fatal_signals,
1640		.maxlen		= sizeof(int),
1641		.mode		= 0644,
1642		.proc_handler	= proc_dointvec,
1643	},
1644#ifdef CONFIG_SPARC
1645	{
1646		.procname	= "reboot-cmd",
1647		.data		= reboot_command,
1648		.maxlen		= 256,
1649		.mode		= 0644,
1650		.proc_handler	= proc_dostring,
1651	},
1652	{
1653		.procname	= "stop-a",
1654		.data		= &stop_a_enabled,
1655		.maxlen		= sizeof (int),
1656		.mode		= 0644,
1657		.proc_handler	= proc_dointvec,
1658	},
1659	{
1660		.procname	= "scons-poweroff",
1661		.data		= &scons_pwroff,
1662		.maxlen		= sizeof (int),
1663		.mode		= 0644,
1664		.proc_handler	= proc_dointvec,
1665	},
1666#endif
1667#ifdef CONFIG_SPARC64
1668	{
1669		.procname	= "tsb-ratio",
1670		.data		= &sysctl_tsb_ratio,
1671		.maxlen		= sizeof (int),
1672		.mode		= 0644,
1673		.proc_handler	= proc_dointvec,
1674	},
1675#endif
1676#ifdef CONFIG_PARISC
1677	{
1678		.procname	= "soft-power",
1679		.data		= &pwrsw_enabled,
1680		.maxlen		= sizeof (int),
1681		.mode		= 0644,
1682		.proc_handler	= proc_dointvec,
1683	},
1684#endif
1685#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1686	{
1687		.procname	= "unaligned-trap",
1688		.data		= &unaligned_enabled,
1689		.maxlen		= sizeof (int),
1690		.mode		= 0644,
1691		.proc_handler	= proc_dointvec,
1692	},
1693#endif
1694#ifdef CONFIG_STACK_TRACER
1695	{
1696		.procname	= "stack_tracer_enabled",
1697		.data		= &stack_tracer_enabled,
1698		.maxlen		= sizeof(int),
1699		.mode		= 0644,
1700		.proc_handler	= stack_trace_sysctl,
1701	},
1702#endif
1703#ifdef CONFIG_TRACING
1704	{
1705		.procname	= "ftrace_dump_on_oops",
1706		.data		= &ftrace_dump_on_oops,
1707		.maxlen		= MAX_TRACER_SIZE,
1708		.mode		= 0644,
1709		.proc_handler	= proc_dostring,
1710	},
1711	{
1712		.procname	= "traceoff_on_warning",
1713		.data		= &__disable_trace_on_warning,
1714		.maxlen		= sizeof(__disable_trace_on_warning),
1715		.mode		= 0644,
1716		.proc_handler	= proc_dointvec,
1717	},
1718	{
1719		.procname	= "tracepoint_printk",
1720		.data		= &tracepoint_printk,
1721		.maxlen		= sizeof(tracepoint_printk),
1722		.mode		= 0644,
1723		.proc_handler	= tracepoint_printk_sysctl,
1724	},
1725#endif
1726#ifdef CONFIG_MODULES
1727	{
1728		.procname	= "modprobe",
1729		.data		= &modprobe_path,
1730		.maxlen		= KMOD_PATH_LEN,
1731		.mode		= 0644,
1732		.proc_handler	= proc_dostring,
1733	},
1734	{
1735		.procname	= "modules_disabled",
1736		.data		= &modules_disabled,
1737		.maxlen		= sizeof(int),
1738		.mode		= 0644,
1739		/* only handle a transition from default "0" to "1" */
1740		.proc_handler	= proc_dointvec_minmax,
1741		.extra1		= SYSCTL_ONE,
1742		.extra2		= SYSCTL_ONE,
1743	},
1744#endif
1745#ifdef CONFIG_UEVENT_HELPER
1746	{
1747		.procname	= "hotplug",
1748		.data		= &uevent_helper,
1749		.maxlen		= UEVENT_HELPER_PATH_LEN,
1750		.mode		= 0644,
1751		.proc_handler	= proc_dostring,
1752	},
1753#endif
1754#ifdef CONFIG_MAGIC_SYSRQ
1755	{
1756		.procname	= "sysrq",
1757		.data		= NULL,
1758		.maxlen		= sizeof (int),
1759		.mode		= 0644,
1760		.proc_handler	= sysrq_sysctl_handler,
1761	},
1762#endif
1763#ifdef CONFIG_PROC_SYSCTL
1764	{
1765		.procname	= "cad_pid",
1766		.data		= NULL,
1767		.maxlen		= sizeof (int),
1768		.mode		= 0600,
1769		.proc_handler	= proc_do_cad_pid,
1770	},
1771#endif
1772	{
1773		.procname	= "threads-max",
1774		.data		= NULL,
1775		.maxlen		= sizeof(int),
1776		.mode		= 0644,
1777		.proc_handler	= sysctl_max_threads,
1778	},
1779	{
 
 
 
 
 
1780		.procname	= "overflowuid",
1781		.data		= &overflowuid,
1782		.maxlen		= sizeof(int),
1783		.mode		= 0644,
1784		.proc_handler	= proc_dointvec_minmax,
1785		.extra1		= SYSCTL_ZERO,
1786		.extra2		= SYSCTL_MAXOLDUID,
1787	},
1788	{
1789		.procname	= "overflowgid",
1790		.data		= &overflowgid,
1791		.maxlen		= sizeof(int),
1792		.mode		= 0644,
1793		.proc_handler	= proc_dointvec_minmax,
1794		.extra1		= SYSCTL_ZERO,
1795		.extra2		= SYSCTL_MAXOLDUID,
1796	},
1797#ifdef CONFIG_S390
1798	{
1799		.procname	= "userprocess_debug",
1800		.data		= &show_unhandled_signals,
1801		.maxlen		= sizeof(int),
1802		.mode		= 0644,
1803		.proc_handler	= proc_dointvec,
1804	},
1805#endif
1806	{
1807		.procname	= "pid_max",
1808		.data		= &pid_max,
1809		.maxlen		= sizeof (int),
1810		.mode		= 0644,
1811		.proc_handler	= proc_dointvec_minmax,
1812		.extra1		= &pid_max_min,
1813		.extra2		= &pid_max_max,
1814	},
1815	{
1816		.procname	= "panic_on_oops",
1817		.data		= &panic_on_oops,
1818		.maxlen		= sizeof(int),
1819		.mode		= 0644,
1820		.proc_handler	= proc_dointvec,
1821	},
1822	{
1823		.procname	= "panic_print",
1824		.data		= &panic_print,
1825		.maxlen		= sizeof(unsigned long),
1826		.mode		= 0644,
1827		.proc_handler	= proc_doulongvec_minmax,
1828	},
1829	{
1830		.procname	= "ngroups_max",
1831		.data		= (void *)&ngroups_max,
1832		.maxlen		= sizeof (int),
1833		.mode		= 0444,
1834		.proc_handler	= proc_dointvec,
1835	},
1836	{
1837		.procname	= "cap_last_cap",
1838		.data		= (void *)&cap_last_cap,
1839		.maxlen		= sizeof(int),
1840		.mode		= 0444,
1841		.proc_handler	= proc_dointvec,
1842	},
1843#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1844	{
1845		.procname       = "unknown_nmi_panic",
1846		.data           = &unknown_nmi_panic,
1847		.maxlen         = sizeof (int),
1848		.mode           = 0644,
1849		.proc_handler   = proc_dointvec,
1850	},
1851#endif
1852
1853#if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1854	defined(CONFIG_DEBUG_STACKOVERFLOW)
1855	{
1856		.procname	= "panic_on_stackoverflow",
1857		.data		= &sysctl_panic_on_stackoverflow,
1858		.maxlen		= sizeof(int),
1859		.mode		= 0644,
1860		.proc_handler	= proc_dointvec,
1861	},
1862#endif
1863#if defined(CONFIG_X86)
1864	{
1865		.procname	= "panic_on_unrecovered_nmi",
1866		.data		= &panic_on_unrecovered_nmi,
1867		.maxlen		= sizeof(int),
1868		.mode		= 0644,
1869		.proc_handler	= proc_dointvec,
1870	},
1871	{
1872		.procname	= "panic_on_io_nmi",
1873		.data		= &panic_on_io_nmi,
1874		.maxlen		= sizeof(int),
1875		.mode		= 0644,
1876		.proc_handler	= proc_dointvec,
1877	},
1878	{
1879		.procname	= "bootloader_type",
1880		.data		= &bootloader_type,
1881		.maxlen		= sizeof (int),
1882		.mode		= 0444,
1883		.proc_handler	= proc_dointvec,
1884	},
1885	{
1886		.procname	= "bootloader_version",
1887		.data		= &bootloader_version,
1888		.maxlen		= sizeof (int),
1889		.mode		= 0444,
1890		.proc_handler	= proc_dointvec,
1891	},
1892	{
1893		.procname	= "io_delay_type",
1894		.data		= &io_delay_type,
1895		.maxlen		= sizeof(int),
1896		.mode		= 0644,
1897		.proc_handler	= proc_dointvec,
1898	},
1899#endif
1900#if defined(CONFIG_MMU)
1901	{
1902		.procname	= "randomize_va_space",
1903		.data		= &randomize_va_space,
1904		.maxlen		= sizeof(int),
1905		.mode		= 0644,
1906		.proc_handler	= proc_dointvec,
1907	},
1908#endif
1909#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1910	{
1911		.procname	= "spin_retry",
1912		.data		= &spin_retry,
1913		.maxlen		= sizeof (int),
1914		.mode		= 0644,
1915		.proc_handler	= proc_dointvec,
1916	},
1917#endif
1918#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1919	{
1920		.procname	= "acpi_video_flags",
1921		.data		= &acpi_realmode_flags,
1922		.maxlen		= sizeof (unsigned long),
1923		.mode		= 0644,
1924		.proc_handler	= proc_doulongvec_minmax,
1925	},
1926#endif
1927#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1928	{
1929		.procname	= "ignore-unaligned-usertrap",
1930		.data		= &no_unaligned_warning,
1931		.maxlen		= sizeof (int),
1932		.mode		= 0644,
1933		.proc_handler	= proc_dointvec,
1934	},
1935#endif
 
 
 
 
 
 
 
 
 
1936#ifdef CONFIG_RT_MUTEXES
1937	{
1938		.procname	= "max_lock_depth",
1939		.data		= &max_lock_depth,
1940		.maxlen		= sizeof(int),
1941		.mode		= 0644,
1942		.proc_handler	= proc_dointvec,
1943	},
1944#endif
 
 
 
 
 
 
 
1945#ifdef CONFIG_PERF_EVENTS
1946	/*
1947	 * User-space scripts rely on the existence of this file
1948	 * as a feature check for perf_events being enabled.
1949	 *
1950	 * So it's an ABI, do not remove!
1951	 */
1952	{
1953		.procname	= "perf_event_paranoid",
1954		.data		= &sysctl_perf_event_paranoid,
1955		.maxlen		= sizeof(sysctl_perf_event_paranoid),
1956		.mode		= 0644,
1957		.proc_handler	= proc_dointvec,
1958	},
1959	{
1960		.procname	= "perf_event_mlock_kb",
1961		.data		= &sysctl_perf_event_mlock,
1962		.maxlen		= sizeof(sysctl_perf_event_mlock),
1963		.mode		= 0644,
1964		.proc_handler	= proc_dointvec,
1965	},
1966	{
1967		.procname	= "perf_event_max_sample_rate",
1968		.data		= &sysctl_perf_event_sample_rate,
1969		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
1970		.mode		= 0644,
1971		.proc_handler	= perf_event_max_sample_rate_handler,
1972		.extra1		= SYSCTL_ONE,
1973	},
1974	{
1975		.procname	= "perf_cpu_time_max_percent",
1976		.data		= &sysctl_perf_cpu_time_max_percent,
1977		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
1978		.mode		= 0644,
1979		.proc_handler	= perf_cpu_time_max_percent_handler,
1980		.extra1		= SYSCTL_ZERO,
1981		.extra2		= SYSCTL_ONE_HUNDRED,
1982	},
1983	{
1984		.procname	= "perf_event_max_stack",
1985		.data		= &sysctl_perf_event_max_stack,
1986		.maxlen		= sizeof(sysctl_perf_event_max_stack),
1987		.mode		= 0644,
1988		.proc_handler	= perf_event_max_stack_handler,
1989		.extra1		= SYSCTL_ZERO,
1990		.extra2		= (void *)&six_hundred_forty_kb,
1991	},
1992	{
1993		.procname	= "perf_event_max_contexts_per_stack",
1994		.data		= &sysctl_perf_event_max_contexts_per_stack,
1995		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
1996		.mode		= 0644,
1997		.proc_handler	= perf_event_max_stack_handler,
1998		.extra1		= SYSCTL_ZERO,
1999		.extra2		= SYSCTL_ONE_THOUSAND,
2000	},
2001#endif
2002	{
2003		.procname	= "panic_on_warn",
2004		.data		= &panic_on_warn,
2005		.maxlen		= sizeof(int),
2006		.mode		= 0644,
2007		.proc_handler	= proc_dointvec_minmax,
2008		.extra1		= SYSCTL_ZERO,
2009		.extra2		= SYSCTL_ONE,
2010	},
2011#ifdef CONFIG_TREE_RCU
2012	{
2013		.procname	= "panic_on_rcu_stall",
2014		.data		= &sysctl_panic_on_rcu_stall,
2015		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2016		.mode		= 0644,
2017		.proc_handler	= proc_dointvec_minmax,
2018		.extra1		= SYSCTL_ZERO,
2019		.extra2		= SYSCTL_ONE,
2020	},
2021	{
2022		.procname	= "max_rcu_stall_to_panic",
2023		.data		= &sysctl_max_rcu_stall_to_panic,
2024		.maxlen		= sizeof(sysctl_max_rcu_stall_to_panic),
2025		.mode		= 0644,
2026		.proc_handler	= proc_dointvec_minmax,
2027		.extra1		= SYSCTL_ONE,
2028		.extra2		= SYSCTL_INT_MAX,
2029	},
2030#endif
 
2031};
2032
2033static struct ctl_table vm_table[] = {
2034	{
2035		.procname	= "overcommit_memory",
2036		.data		= &sysctl_overcommit_memory,
2037		.maxlen		= sizeof(sysctl_overcommit_memory),
2038		.mode		= 0644,
2039		.proc_handler	= overcommit_policy_handler,
2040		.extra1		= SYSCTL_ZERO,
2041		.extra2		= SYSCTL_TWO,
2042	},
2043	{
2044		.procname	= "overcommit_ratio",
2045		.data		= &sysctl_overcommit_ratio,
2046		.maxlen		= sizeof(sysctl_overcommit_ratio),
2047		.mode		= 0644,
2048		.proc_handler	= overcommit_ratio_handler,
2049	},
2050	{
2051		.procname	= "overcommit_kbytes",
2052		.data		= &sysctl_overcommit_kbytes,
2053		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2054		.mode		= 0644,
2055		.proc_handler	= overcommit_kbytes_handler,
2056	},
2057	{
2058		.procname	= "page-cluster",
2059		.data		= &page_cluster,
2060		.maxlen		= sizeof(int),
2061		.mode		= 0644,
2062		.proc_handler	= proc_dointvec_minmax,
2063		.extra1		= SYSCTL_ZERO,
2064		.extra2		= (void *)&page_cluster_max,
2065	},
2066	{
2067		.procname	= "dirtytime_expire_seconds",
2068		.data		= &dirtytime_expire_interval,
2069		.maxlen		= sizeof(dirtytime_expire_interval),
2070		.mode		= 0644,
2071		.proc_handler	= dirtytime_interval_handler,
2072		.extra1		= SYSCTL_ZERO,
2073	},
2074	{
2075		.procname	= "swappiness",
2076		.data		= &vm_swappiness,
2077		.maxlen		= sizeof(vm_swappiness),
2078		.mode		= 0644,
2079		.proc_handler	= proc_dointvec_minmax,
2080		.extra1		= SYSCTL_ZERO,
2081		.extra2		= SYSCTL_TWO_HUNDRED,
2082	},
2083#ifdef CONFIG_NUMA
2084	{
2085		.procname	= "numa_stat",
2086		.data		= &sysctl_vm_numa_stat,
2087		.maxlen		= sizeof(int),
2088		.mode		= 0644,
2089		.proc_handler	= sysctl_vm_numa_stat_handler,
2090		.extra1		= SYSCTL_ZERO,
2091		.extra2		= SYSCTL_ONE,
2092	},
2093#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2094	{
2095		.procname	= "drop_caches",
2096		.data		= &sysctl_drop_caches,
2097		.maxlen		= sizeof(int),
2098		.mode		= 0200,
2099		.proc_handler	= drop_caches_sysctl_handler,
2100		.extra1		= SYSCTL_ONE,
2101		.extra2		= SYSCTL_FOUR,
2102	},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2103	{
2104		.procname	= "page_lock_unfairness",
2105		.data		= &sysctl_page_lock_unfairness,
2106		.maxlen		= sizeof(sysctl_page_lock_unfairness),
2107		.mode		= 0644,
2108		.proc_handler	= proc_dointvec_minmax,
2109		.extra1		= SYSCTL_ZERO,
2110	},
2111#ifdef CONFIG_MMU
2112	{
2113		.procname	= "max_map_count",
2114		.data		= &sysctl_max_map_count,
2115		.maxlen		= sizeof(sysctl_max_map_count),
2116		.mode		= 0644,
2117		.proc_handler	= proc_dointvec_minmax,
2118		.extra1		= SYSCTL_ZERO,
2119	},
2120#else
2121	{
2122		.procname	= "nr_trim_pages",
2123		.data		= &sysctl_nr_trim_pages,
2124		.maxlen		= sizeof(sysctl_nr_trim_pages),
2125		.mode		= 0644,
2126		.proc_handler	= proc_dointvec_minmax,
2127		.extra1		= SYSCTL_ZERO,
2128	},
2129#endif
2130	{
2131		.procname	= "vfs_cache_pressure",
2132		.data		= &sysctl_vfs_cache_pressure,
2133		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
2134		.mode		= 0644,
2135		.proc_handler	= proc_dointvec_minmax,
2136		.extra1		= SYSCTL_ZERO,
2137	},
2138#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2139    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2140	{
2141		.procname	= "legacy_va_layout",
2142		.data		= &sysctl_legacy_va_layout,
2143		.maxlen		= sizeof(sysctl_legacy_va_layout),
2144		.mode		= 0644,
2145		.proc_handler	= proc_dointvec_minmax,
2146		.extra1		= SYSCTL_ZERO,
2147	},
2148#endif
2149#ifdef CONFIG_NUMA
2150	{
2151		.procname	= "zone_reclaim_mode",
2152		.data		= &node_reclaim_mode,
2153		.maxlen		= sizeof(node_reclaim_mode),
2154		.mode		= 0644,
2155		.proc_handler	= proc_dointvec_minmax,
2156		.extra1		= SYSCTL_ZERO,
2157	},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2158#endif
2159#ifdef CONFIG_SMP
2160	{
2161		.procname	= "stat_interval",
2162		.data		= &sysctl_stat_interval,
2163		.maxlen		= sizeof(sysctl_stat_interval),
2164		.mode		= 0644,
2165		.proc_handler	= proc_dointvec_jiffies,
2166	},
2167	{
2168		.procname	= "stat_refresh",
2169		.data		= NULL,
2170		.maxlen		= 0,
2171		.mode		= 0600,
2172		.proc_handler	= vmstat_refresh,
2173	},
2174#endif
2175#ifdef CONFIG_MMU
2176	{
2177		.procname	= "mmap_min_addr",
2178		.data		= &dac_mmap_min_addr,
2179		.maxlen		= sizeof(unsigned long),
2180		.mode		= 0644,
2181		.proc_handler	= mmap_min_addr_handler,
2182	},
2183#endif
 
 
 
 
 
 
 
 
 
2184#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2185   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2186	{
2187		.procname	= "vdso_enabled",
2188#ifdef CONFIG_X86_32
2189		.data		= &vdso32_enabled,
2190		.maxlen		= sizeof(vdso32_enabled),
2191#else
2192		.data		= &vdso_enabled,
2193		.maxlen		= sizeof(vdso_enabled),
2194#endif
2195		.mode		= 0644,
2196		.proc_handler	= proc_dointvec,
2197		.extra1		= SYSCTL_ZERO,
2198	},
2199#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2200	{
2201		.procname	= "user_reserve_kbytes",
2202		.data		= &sysctl_user_reserve_kbytes,
2203		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
2204		.mode		= 0644,
2205		.proc_handler	= proc_doulongvec_minmax,
2206	},
2207	{
2208		.procname	= "admin_reserve_kbytes",
2209		.data		= &sysctl_admin_reserve_kbytes,
2210		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
2211		.mode		= 0644,
2212		.proc_handler	= proc_doulongvec_minmax,
2213	},
2214#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2215	{
2216		.procname	= "mmap_rnd_bits",
2217		.data		= &mmap_rnd_bits,
2218		.maxlen		= sizeof(mmap_rnd_bits),
2219		.mode		= 0600,
2220		.proc_handler	= proc_dointvec_minmax,
2221		.extra1		= (void *)&mmap_rnd_bits_min,
2222		.extra2		= (void *)&mmap_rnd_bits_max,
2223	},
2224#endif
2225#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2226	{
2227		.procname	= "mmap_rnd_compat_bits",
2228		.data		= &mmap_rnd_compat_bits,
2229		.maxlen		= sizeof(mmap_rnd_compat_bits),
2230		.mode		= 0600,
2231		.proc_handler	= proc_dointvec_minmax,
2232		.extra1		= (void *)&mmap_rnd_compat_bits_min,
2233		.extra2		= (void *)&mmap_rnd_compat_bits_max,
2234	},
2235#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2236};
2237
 
 
 
 
 
 
 
 
 
2238int __init sysctl_init_bases(void)
2239{
2240	register_sysctl_init("kernel", kern_table);
2241	register_sysctl_init("vm", vm_table);
 
 
2242
2243	return 0;
2244}
2245#endif /* CONFIG_SYSCTL */
2246/*
2247 * No sense putting this after each symbol definition, twice,
2248 * exception granted :-)
2249 */
2250EXPORT_SYMBOL(proc_dobool);
2251EXPORT_SYMBOL(proc_dointvec);
2252EXPORT_SYMBOL(proc_douintvec);
2253EXPORT_SYMBOL(proc_dointvec_jiffies);
2254EXPORT_SYMBOL(proc_dointvec_minmax);
2255EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2256EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2257EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2258EXPORT_SYMBOL(proc_dostring);
2259EXPORT_SYMBOL(proc_doulongvec_minmax);
2260EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2261EXPORT_SYMBOL(proc_do_large_bitmap);