Linux Audio

Check our new training course

Buildroot integration, development and maintenance

Need a Buildroot system for your embedded project?
Loading...
v5.4
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * xsave/xrstor support.
   4 *
   5 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
   6 */
 
   7#include <linux/compat.h>
   8#include <linux/cpu.h>
   9#include <linux/mman.h>
 
  10#include <linux/pkeys.h>
  11#include <linux/seq_file.h>
  12#include <linux/proc_fs.h>
 
  13
  14#include <asm/fpu/api.h>
  15#include <asm/fpu/internal.h>
  16#include <asm/fpu/signal.h>
  17#include <asm/fpu/regset.h>
  18#include <asm/fpu/xstate.h>
 
  19
  20#include <asm/tlbflush.h>
  21#include <asm/cpufeature.h>
 
 
 
 
 
 
 
 
 
 
  22
  23/*
  24 * Although we spell it out in here, the Processor Trace
  25 * xfeature is completely unused.  We use other mechanisms
  26 * to save/restore PT state in Linux.
  27 */
  28static const char *xfeature_names[] =
  29{
  30	"x87 floating point registers"	,
  31	"SSE registers"			,
  32	"AVX registers"			,
  33	"MPX bounds registers"		,
  34	"MPX CSR"			,
  35	"AVX-512 opmask"		,
  36	"AVX-512 Hi256"			,
  37	"AVX-512 ZMM_Hi256"		,
  38	"Processor Trace (unused)"	,
  39	"Protection Keys User registers",
  40	"unknown xstate feature"	,
 
 
 
 
 
 
 
 
 
  41};
  42
  43static short xsave_cpuid_features[] __initdata = {
  44	X86_FEATURE_FPU,
  45	X86_FEATURE_XMM,
  46	X86_FEATURE_AVX,
  47	X86_FEATURE_MPX,
  48	X86_FEATURE_MPX,
  49	X86_FEATURE_AVX512F,
  50	X86_FEATURE_AVX512F,
  51	X86_FEATURE_AVX512F,
  52	X86_FEATURE_INTEL_PT,
  53	X86_FEATURE_PKU,
 
 
 
 
  54};
  55
  56/*
  57 * Mask of xstate features supported by the CPU and the kernel:
  58 */
  59u64 xfeatures_mask __read_mostly;
 
  60
  61static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
  62static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] = -1};
  63static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
  64
  65/*
  66 * The XSAVE area of kernel can be in standard or compacted format;
  67 * it is always in standard format for user mode. This is the user
  68 * mode standard format size used for signal and ptrace frames.
  69 */
  70unsigned int fpu_user_xstate_size;
  71
  72/*
  73 * Return whether the system supports a given xfeature.
  74 *
  75 * Also return the name of the (most advanced) feature that the caller requested:
  76 */
  77int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
  78{
  79	u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;
  80
  81	if (unlikely(feature_name)) {
  82		long xfeature_idx, max_idx;
  83		u64 xfeatures_print;
  84		/*
  85		 * So we use FLS here to be able to print the most advanced
  86		 * feature that was requested but is missing. So if a driver
  87		 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
  88		 * missing AVX feature - this is the most informative message
  89		 * to users:
  90		 */
  91		if (xfeatures_missing)
  92			xfeatures_print = xfeatures_missing;
  93		else
  94			xfeatures_print = xfeatures_needed;
  95
  96		xfeature_idx = fls64(xfeatures_print)-1;
  97		max_idx = ARRAY_SIZE(xfeature_names)-1;
  98		xfeature_idx = min(xfeature_idx, max_idx);
  99
 100		*feature_name = xfeature_names[xfeature_idx];
 101	}
 102
 103	if (xfeatures_missing)
 104		return 0;
 105
 106	return 1;
 107}
 108EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
 109
 110static int xfeature_is_supervisor(int xfeature_nr)
 111{
 112	/*
 113	 * We currently do not support supervisor states, but if
 114	 * we did, we could find out like this.
 115	 *
 116	 * SDM says: If state component 'i' is a user state component,
 117	 * ECX[0] return 0; if state component i is a supervisor
 118	 * state component, ECX[0] returns 1.
 119	 */
 120	u32 eax, ebx, ecx, edx;
 121
 122	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
 123	return !!(ecx & 1);
 124}
 125
 126static int xfeature_is_user(int xfeature_nr)
 127{
 128	return !xfeature_is_supervisor(xfeature_nr);
 129}
 130
 131/*
 132 * When executing XSAVEOPT (or other optimized XSAVE instructions), if
 133 * a processor implementation detects that an FPU state component is still
 134 * (or is again) in its initialized state, it may clear the corresponding
 135 * bit in the header.xfeatures field, and can skip the writeout of registers
 136 * to the corresponding memory layout.
 137 *
 138 * This means that when the bit is zero, the state component might still contain
 139 * some previous - non-initialized register state.
 140 *
 141 * Before writing xstate information to user-space we sanitize those components,
 142 * to always ensure that the memory layout of a feature will be in the init state
 143 * if the corresponding header bit is zero. This is to ensure that user-space doesn't
 144 * see some stale state in the memory layout during signal handling, debugging etc.
 145 */
 146void fpstate_sanitize_xstate(struct fpu *fpu)
 147{
 148	struct fxregs_state *fx = &fpu->state.fxsave;
 149	int feature_bit;
 150	u64 xfeatures;
 151
 152	if (!use_xsaveopt())
 153		return;
 154
 155	xfeatures = fpu->state.xsave.header.xfeatures;
 156
 157	/*
 158	 * None of the feature bits are in init state. So nothing else
 159	 * to do for us, as the memory layout is up to date.
 160	 */
 161	if ((xfeatures & xfeatures_mask) == xfeatures_mask)
 162		return;
 
 163
 164	/*
 165	 * FP is in init state
 
 
 166	 */
 167	if (!(xfeatures & XFEATURE_MASK_FP)) {
 168		fx->cwd = 0x37f;
 169		fx->swd = 0;
 170		fx->twd = 0;
 171		fx->fop = 0;
 172		fx->rip = 0;
 173		fx->rdp = 0;
 174		memset(&fx->st_space[0], 0, 128);
 175	}
 176
 177	/*
 178	 * SSE is in init state
 179	 */
 180	if (!(xfeatures & XFEATURE_MASK_SSE))
 181		memset(&fx->xmm_space[0], 0, 256);
 182
 183	/*
 184	 * First two features are FPU and SSE, which above we handled
 185	 * in a special way already:
 186	 */
 187	feature_bit = 0x2;
 188	xfeatures = (xfeatures_mask & ~xfeatures) >> 2;
 189
 190	/*
 191	 * Update all the remaining memory layouts according to their
 192	 * standard xstate layout, if their header bit is in the init
 193	 * state:
 194	 */
 195	while (xfeatures) {
 196		if (xfeatures & 0x1) {
 197			int offset = xstate_comp_offsets[feature_bit];
 198			int size = xstate_sizes[feature_bit];
 199
 200			memcpy((void *)fx + offset,
 201			       (void *)&init_fpstate.xsave + offset,
 202			       size);
 203		}
 204
 205		xfeatures >>= 1;
 206		feature_bit++;
 207	}
 
 208}
 209
 210/*
 211 * Enable the extended processor state save/restore feature.
 212 * Called once per CPU onlining.
 213 */
 214void fpu__init_cpu_xstate(void)
 215{
 216	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
 217		return;
 
 
 
 218	/*
 219	 * Make it clear that XSAVES supervisor states are not yet
 220	 * implemented should anyone expect it to work by changing
 221	 * bits in XFEATURE_MASK_* macros and XCR0.
 
 222	 */
 223	WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
 224		"x86/fpu: XSAVES supervisor states are not yet implemented.\n");
 225
 226	xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
 
 
 
 
 
 227
 228	cr4_set_bits(X86_CR4_OSXSAVE);
 229	xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
 
 
 
 
 
 230}
 231
 232/*
 233 * Note that in the future we will likely need a pair of
 234 * functions here: one for user xstates and the other for
 235 * system xstates.  For now, they are the same.
 236 */
 237static int xfeature_enabled(enum xfeature xfeature)
 238{
 239	return !!(xfeatures_mask & (1UL << xfeature));
 240}
 241
 242/*
 243 * Record the offsets and sizes of various xstates contained
 244 * in the XSAVE state memory layout.
 245 */
 246static void __init setup_xstate_features(void)
 247{
 248	u32 eax, ebx, ecx, edx, i;
 249	/* start at the beginnning of the "extended state" */
 250	unsigned int last_good_offset = offsetof(struct xregs_state,
 251						 extended_state_area);
 252	/*
 253	 * The FP xstates and SSE xstates are legacy states. They are always
 254	 * in the fixed offsets in the xsave area in either compacted form
 255	 * or standard form.
 256	 */
 257	xstate_offsets[0] = 0;
 258	xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space);
 259	xstate_offsets[1] = xstate_sizes[0];
 260	xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space);
 261
 262	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 263		if (!xfeature_enabled(i))
 264			continue;
 265
 
 266		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
 267
 
 
 
 268		/*
 269		 * If an xfeature is supervisor state, the offset
 270		 * in EBX is invalid. We leave it to -1.
 271		 */
 272		if (xfeature_is_user(i))
 273			xstate_offsets[i] = ebx;
 
 
 274
 275		xstate_sizes[i] = eax;
 276		/*
 277		 * In our xstate size checks, we assume that the
 278		 * highest-numbered xstate feature has the
 279		 * highest offset in the buffer.  Ensure it does.
 280		 */
 281		WARN_ONCE(last_good_offset > xstate_offsets[i],
 282			"x86/fpu: misordered xstate at %d\n", last_good_offset);
 
 283		last_good_offset = xstate_offsets[i];
 284	}
 285}
 286
 287static void __init print_xstate_feature(u64 xstate_mask)
 288{
 289	const char *feature_name;
 290
 291	if (cpu_has_xfeatures(xstate_mask, &feature_name))
 292		pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
 293}
 294
 295/*
 296 * Print out all the supported xstate features:
 297 */
 298static void __init print_xstate_features(void)
 299{
 300	print_xstate_feature(XFEATURE_MASK_FP);
 301	print_xstate_feature(XFEATURE_MASK_SSE);
 302	print_xstate_feature(XFEATURE_MASK_YMM);
 303	print_xstate_feature(XFEATURE_MASK_BNDREGS);
 304	print_xstate_feature(XFEATURE_MASK_BNDCSR);
 305	print_xstate_feature(XFEATURE_MASK_OPMASK);
 306	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
 307	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
 308	print_xstate_feature(XFEATURE_MASK_PKRU);
 
 
 
 
 309}
 310
 311/*
 312 * This check is important because it is easy to get XSTATE_*
 313 * confused with XSTATE_BIT_*.
 314 */
 315#define CHECK_XFEATURE(nr) do {		\
 316	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
 317	WARN_ON(nr >= XFEATURE_MAX);	\
 318} while (0)
 319
 320/*
 321 * We could cache this like xstate_size[], but we only use
 322 * it here, so it would be a waste of space.
 323 */
 324static int xfeature_is_aligned(int xfeature_nr)
 325{
 326	u32 eax, ebx, ecx, edx;
 327
 328	CHECK_XFEATURE(xfeature_nr);
 329	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
 330	/*
 331	 * The value returned by ECX[1] indicates the alignment
 332	 * of state component 'i' when the compacted format
 333	 * of the extended region of an XSAVE area is used:
 334	 */
 335	return !!(ecx & 2);
 336}
 337
 338/*
 339 * This function sets up offsets and sizes of all extended states in
 340 * xsave area. This supports both standard format and compacted format
 341 * of the xsave aread.
 342 */
 343static void __init setup_xstate_comp(void)
 344{
 345	unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8];
 346	int i;
 
 
 
 
 
 
 
 347
 348	/*
 349	 * The FP xstates and SSE xstates are legacy states. They are always
 350	 * in the fixed offsets in the xsave area in either compacted form
 351	 * or standard form.
 352	 */
 353	xstate_comp_offsets[0] = 0;
 354	xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
 355
 356	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
 357		for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 358			if (xfeature_enabled(i)) {
 359				xstate_comp_offsets[i] = xstate_offsets[i];
 360				xstate_comp_sizes[i] = xstate_sizes[i];
 361			}
 362		}
 363		return;
 364	}
 365
 366	xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
 367		FXSAVE_SIZE + XSAVE_HDR_SIZE;
 368
 369	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 370		if (xfeature_enabled(i))
 371			xstate_comp_sizes[i] = xstate_sizes[i];
 372		else
 373			xstate_comp_sizes[i] = 0;
 374
 375		if (i > FIRST_EXTENDED_XFEATURE) {
 376			xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
 377					+ xstate_comp_sizes[i-1];
 378
 379			if (xfeature_is_aligned(i))
 380				xstate_comp_offsets[i] =
 381					ALIGN(xstate_comp_offsets[i], 64);
 382		}
 383	}
 384}
 385
 386/*
 387 * Print out xstate component offsets and sizes
 388 */
 389static void __init print_xstate_offset_size(void)
 390{
 391	int i;
 392
 393	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 394		if (!xfeature_enabled(i))
 395			continue;
 396		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
 397			 i, xstate_comp_offsets[i], i, xstate_sizes[i]);
 398	}
 399}
 
 
 
 
 
 
 400
 401/*
 402 * setup the xstate image representing the init state
 403 */
 404static void __init setup_init_fpu_buf(void)
 405{
 406	static int on_boot_cpu __initdata = 1;
 407
 408	WARN_ON_FPU(!on_boot_cpu);
 409	on_boot_cpu = 0;
 410
 411	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 412		return;
 413
 414	setup_xstate_features();
 415	print_xstate_features();
 416
 417	if (boot_cpu_has(X86_FEATURE_XSAVES))
 418		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
 419
 420	/*
 421	 * Init all the features state with header.xfeatures being 0x0
 422	 */
 423	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
 424
 425	/*
 426	 * Dump the init state again. This is to identify the init state
 427	 * of any feature which is not represented by all zero's.
 
 
 
 
 
 
 
 
 
 
 
 
 428	 */
 429	copy_xregs_to_kernel_booting(&init_fpstate.xsave);
 430}
 431
 432static int xfeature_uncompacted_offset(int xfeature_nr)
 433{
 434	u32 eax, ebx, ecx, edx;
 435
 436	/*
 437	 * Only XSAVES supports supervisor states and it uses compacted
 438	 * format. Checking a supervisor state's uncompacted offset is
 439	 * an error.
 440	 */
 441	if (XFEATURE_MASK_SUPERVISOR & BIT_ULL(xfeature_nr)) {
 442		WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
 443		return -1;
 444	}
 445
 446	CHECK_XFEATURE(xfeature_nr);
 447	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
 448	return ebx;
 449}
 450
 451static int xfeature_size(int xfeature_nr)
 452{
 453	u32 eax, ebx, ecx, edx;
 454
 455	CHECK_XFEATURE(xfeature_nr);
 456	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
 457	return eax;
 458}
 459
 460/*
 461 * 'XSAVES' implies two different things:
 462 * 1. saving of supervisor/system state
 463 * 2. using the compacted format
 464 *
 465 * Use this function when dealing with the compacted format so
 466 * that it is obvious which aspect of 'XSAVES' is being handled
 467 * by the calling code.
 468 */
 469int using_compacted_format(void)
 470{
 471	return boot_cpu_has(X86_FEATURE_XSAVES);
 472}
 473
 474/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
 475int validate_xstate_header(const struct xstate_header *hdr)
 
 476{
 477	/* No unknown or supervisor features may be set */
 478	if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
 479		return -EINVAL;
 480
 481	/* Userspace must use the uncompacted format */
 482	if (hdr->xcomp_bv)
 483		return -EINVAL;
 484
 485	/*
 486	 * If 'reserved' is shrunken to add a new field, make sure to validate
 487	 * that new field here!
 488	 */
 489	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
 490
 491	/* No reserved bits may be set */
 492	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
 493		return -EINVAL;
 494
 495	return 0;
 496}
 497
 498static void __xstate_dump_leaves(void)
 499{
 500	int i;
 501	u32 eax, ebx, ecx, edx;
 502	static int should_dump = 1;
 503
 504	if (!should_dump)
 505		return;
 506	should_dump = 0;
 507	/*
 508	 * Dump out a few leaves past the ones that we support
 509	 * just in case there are some goodies up there
 510	 */
 511	for (i = 0; i < XFEATURE_MAX + 10; i++) {
 512		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
 513		pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
 514			XSTATE_CPUID, i, eax, ebx, ecx, edx);
 515	}
 516}
 517
 518#define XSTATE_WARN_ON(x) do {							\
 519	if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) {	\
 520		__xstate_dump_leaves();						\
 521	}									\
 522} while (0)
 523
 524#define XCHECK_SZ(sz, nr, nr_macro, __struct) do {			\
 525	if ((nr == nr_macro) &&						\
 526	    WARN_ONCE(sz != sizeof(__struct),				\
 527		"%s: struct is %zu bytes, cpu state %d bytes\n",	\
 528		__stringify(nr_macro), sizeof(__struct), sz)) {		\
 529		__xstate_dump_leaves();					\
 530	}								\
 531} while (0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 532
 533/*
 534 * We have a C struct for each 'xstate'.  We need to ensure
 535 * that our software representation matches what the CPU
 536 * tells us about the state's size.
 537 */
 538static void check_xstate_against_struct(int nr)
 539{
 540	/*
 541	 * Ask the CPU for the size of the state.
 542	 */
 543	int sz = xfeature_size(nr);
 
 544	/*
 545	 * Match each CPU state with the corresponding software
 546	 * structure.
 547	 */
 548	XCHECK_SZ(sz, nr, XFEATURE_YMM,       struct ymmh_struct);
 549	XCHECK_SZ(sz, nr, XFEATURE_BNDREGS,   struct mpx_bndreg_state);
 550	XCHECK_SZ(sz, nr, XFEATURE_BNDCSR,    struct mpx_bndcsr_state);
 551	XCHECK_SZ(sz, nr, XFEATURE_OPMASK,    struct avx_512_opmask_state);
 552	XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
 553	XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
 554	XCHECK_SZ(sz, nr, XFEATURE_PKRU,      struct pkru_state);
 555
 556	/*
 557	 * Make *SURE* to add any feature numbers in below if
 558	 * there are "holes" in the xsave state component
 559	 * numbers.
 560	 */
 561	if ((nr < XFEATURE_YMM) ||
 562	    (nr >= XFEATURE_MAX) ||
 563	    (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) {
 564		WARN_ONCE(1, "no structure for xstate: %d\n", nr);
 565		XSTATE_WARN_ON(1);
 566	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 567}
 568
 569/*
 570 * This essentially double-checks what the cpu told us about
 571 * how large the XSAVE buffer needs to be.  We are recalculating
 572 * it to be safe.
 573 */
 574static void do_extra_xstate_size_checks(void)
 575{
 576	int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
 
 
 
 
 
 
 577	int i;
 578
 579	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 580		if (!xfeature_enabled(i))
 581			continue;
 582
 583		check_xstate_against_struct(i);
 584		/*
 585		 * Supervisor state components can be managed only by
 586		 * XSAVES, which is compacted-format only.
 587		 */
 588		if (!using_compacted_format())
 589			XSTATE_WARN_ON(xfeature_is_supervisor(i));
 590
 591		/* Align from the end of the previous feature */
 592		if (xfeature_is_aligned(i))
 593			paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64);
 594		/*
 595		 * The offset of a given state in the non-compacted
 596		 * format is given to us in a CPUID leaf.  We check
 597		 * them for being ordered (increasing offsets) in
 598		 * setup_xstate_features().
 599		 */
 600		if (!using_compacted_format())
 601			paranoid_xstate_size = xfeature_uncompacted_offset(i);
 602		/*
 603		 * The compacted-format offset always depends on where
 604		 * the previous state ended.
 605		 */
 606		paranoid_xstate_size += xfeature_size(i);
 607	}
 608	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
 
 
 
 609}
 610
 611
 612/*
 613 * Get total size of enabled xstates in XCR0/xfeatures_mask.
 614 *
 615 * Note the SDM's wording here.  "sub-function 0" only enumerates
 616 * the size of the *user* states.  If we use it to size a buffer
 617 * that we use 'XSAVES' on, we could potentially overflow the
 618 * buffer because 'XSAVES' saves system states too.
 619 *
 620 * Note that we do not currently set any bits on IA32_XSS so
 621 * 'XCR0 | IA32_XSS == XCR0' for now.
 622 */
 623static unsigned int __init get_xsaves_size(void)
 624{
 625	unsigned int eax, ebx, ecx, edx;
 626	/*
 627	 * - CPUID function 0DH, sub-function 1:
 628	 *    EBX enumerates the size (in bytes) required by
 629	 *    the XSAVES instruction for an XSAVE area
 630	 *    containing all the state components
 631	 *    corresponding to bits currently set in
 632	 *    XCR0 | IA32_XSS.
 
 
 
 
 633	 */
 634	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
 635	return ebx;
 636}
 637
 638static unsigned int __init get_xsave_size(void)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 639{
 640	unsigned int eax, ebx, ecx, edx;
 641	/*
 642	 * - CPUID function 0DH, sub-function 0:
 643	 *    EBX enumerates the size (in bytes) required by
 644	 *    the XSAVE instruction for an XSAVE area
 645	 *    containing all the *user* state components
 646	 *    corresponding to bits currently set in XCR0.
 647	 */
 648	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
 649	return ebx;
 650}
 651
 652/*
 653 * Will the runtime-enumerated 'xstate_size' fit in the init
 654 * task's statically-allocated buffer?
 655 */
 656static bool is_supported_xstate_size(unsigned int test_xstate_size)
 657{
 658	if (test_xstate_size <= sizeof(union fpregs_state))
 659		return true;
 660
 661	pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n",
 662			sizeof(union fpregs_state), test_xstate_size);
 663	return false;
 664}
 665
 666static int __init init_xstate_size(void)
 667{
 668	/* Recompute the context size for enabled features: */
 669	unsigned int possible_xstate_size;
 670	unsigned int xsave_size;
 671
 672	xsave_size = get_xsave_size();
 
 673
 674	if (boot_cpu_has(X86_FEATURE_XSAVES))
 675		possible_xstate_size = get_xsaves_size();
 
 
 
 
 
 
 
 
 676	else
 677		possible_xstate_size = xsave_size;
 678
 679	/* Ensure we have the space to store all enabled: */
 680	if (!is_supported_xstate_size(possible_xstate_size))
 
 
 681		return -EINVAL;
 682
 683	/*
 684	 * The size is OK, we are definitely going to use xsave,
 685	 * make it known to the world that we need more space.
 686	 */
 687	fpu_kernel_xstate_size = possible_xstate_size;
 688	do_extra_xstate_size_checks();
 689
 690	/*
 691	 * User space is always in standard format.
 692	 */
 693	fpu_user_xstate_size = xsave_size;
 694	return 0;
 695}
 696
 697/*
 698 * We enabled the XSAVE hardware, but something went wrong and
 699 * we can not use it.  Disable it.
 700 */
 701static void fpu__init_disable_system_xstate(void)
 702{
 703	xfeatures_mask = 0;
 704	cr4_clear_bits(X86_CR4_OSXSAVE);
 705	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 706}
 707
 708/*
 709 * Enable and initialize the xsave feature.
 710 * Called once per system bootup.
 711 */
 712void __init fpu__init_system_xstate(void)
 713{
 714	unsigned int eax, ebx, ecx, edx;
 715	static int on_boot_cpu __initdata = 1;
 716	int err;
 717	int i;
 718
 719	WARN_ON_FPU(!on_boot_cpu);
 720	on_boot_cpu = 0;
 721
 722	if (!boot_cpu_has(X86_FEATURE_FPU)) {
 723		pr_info("x86/fpu: No FPU detected\n");
 724		return;
 725	}
 726
 727	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
 728		pr_info("x86/fpu: x87 FPU will use %s\n",
 729			boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
 730		return;
 731	}
 732
 733	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
 734		WARN_ON_FPU(1);
 735		return;
 736	}
 737
 
 
 
 738	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
 739	xfeatures_mask = eax + ((u64)edx << 32);
 
 
 
 
 
 
 740
 741	if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
 742		/*
 743		 * This indicates that something really unexpected happened
 744		 * with the enumeration.  Disable XSAVE and try to continue
 745		 * booting without it.  This is too early to BUG().
 746		 */
 747		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
 
 748		goto out_disable;
 749	}
 750
 751	/*
 752	 * Clear XSAVE features that are disabled in the normal CPUID.
 753	 */
 754	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
 755		if (!boot_cpu_has(xsave_cpuid_features[i]))
 756			xfeatures_mask &= ~BIT(i);
 
 
 
 757	}
 758
 759	xfeatures_mask &= fpu__get_supported_xfeatures_mask();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 760
 761	/* Enable xstate instructions to be able to continue with initialization: */
 762	fpu__init_cpu_xstate();
 
 
 
 
 763	err = init_xstate_size();
 764	if (err)
 765		goto out_disable;
 766
 
 
 
 767	/*
 768	 * Update info used for ptrace frames; use standard-format size and no
 769	 * supervisor xstates:
 770	 */
 771	update_regset_xstate_info(fpu_user_xstate_size,	xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 772
 773	fpu__init_prepare_fx_sw_frame();
 774	setup_init_fpu_buf();
 775	setup_xstate_comp();
 776	print_xstate_offset_size();
 777
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 778	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 779		xfeatures_mask,
 780		fpu_kernel_xstate_size,
 781		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 782	return;
 783
 784out_disable:
 785	/* something went wrong, try to boot without any XSAVE support */
 786	fpu__init_disable_system_xstate();
 787}
 788
 789/*
 790 * Restore minimal FPU state after suspend:
 791 */
 792void fpu__resume_cpu(void)
 793{
 794	/*
 795	 * Restore XCR0 on xsave capable CPUs:
 796	 */
 797	if (boot_cpu_has(X86_FEATURE_XSAVE))
 798		xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
 
 
 
 
 
 
 
 
 
 
 
 
 799}
 800
 801/*
 802 * Given an xstate feature nr, calculate where in the xsave
 803 * buffer the state is.  Callers should ensure that the buffer
 804 * is valid.
 805 */
 806static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 807{
 808	if (!xfeature_enabled(xfeature_nr)) {
 809		WARN_ON_FPU(1);
 
 810		return NULL;
 
 
 
 
 811	}
 812
 813	return (void *)xsave + xstate_comp_offsets[xfeature_nr];
 814}
 
 815/*
 816 * Given the xsave area and a state inside, this function returns the
 817 * address of the state.
 818 *
 819 * This is the API that is called to get xstate address in either
 820 * standard format or compacted format of xsave area.
 821 *
 822 * Note that if there is no data for the field in the xsave buffer
 823 * this will return NULL.
 824 *
 825 * Inputs:
 826 *	xstate: the thread's storage area for all FPU data
 827 *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
 828 *	XFEATURE_SSE, etc...)
 829 * Output:
 830 *	address of the state in the xsave area, or NULL if the
 831 *	field is not present in the xsave buffer.
 832 */
 833void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 834{
 835	/*
 836	 * Do we even *have* xsave state?
 837	 */
 838	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 839		return NULL;
 840
 841	/*
 842	 * We should not ever be requesting features that we
 843	 * have not enabled.  Remember that pcntxt_mask is
 844	 * what we write to the XCR0 register.
 845	 */
 846	WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
 847		  "get of unsupported state");
 
 848	/*
 849	 * This assumes the last 'xsave*' instruction to
 850	 * have requested that 'xfeature_nr' be saved.
 851	 * If it did not, we might be seeing and old value
 852	 * of the field in the buffer.
 853	 *
 854	 * This can happen because the last 'xsave' did not
 855	 * request that this feature be saved (unlikely)
 856	 * or because the "init optimization" caused it
 857	 * to not be saved.
 858	 */
 859	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
 860		return NULL;
 861
 862	return __raw_xsave_addr(xsave, xfeature_nr);
 863}
 864EXPORT_SYMBOL_GPL(get_xsave_addr);
 865
 866/*
 867 * This wraps up the common operations that need to occur when retrieving
 868 * data from xsave state.  It first ensures that the current task was
 869 * using the FPU and retrieves the data in to a buffer.  It then calculates
 870 * the offset of the requested field in the buffer.
 871 *
 872 * This function is safe to call whether the FPU is in use or not.
 873 *
 874 * Note that this only works on the current task.
 875 *
 876 * Inputs:
 877 *	@xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
 878 *	XFEATURE_SSE, etc...)
 879 * Output:
 880 *	address of the state in the xsave area or NULL if the state
 881 *	is not present or is in its 'init state'.
 882 */
 883const void *get_xsave_field_ptr(int xfeature_nr)
 884{
 885	struct fpu *fpu = &current->thread.fpu;
 886
 887	/*
 888	 * fpu__save() takes the CPU's xstate registers
 889	 * and saves them off to the 'fpu memory buffer.
 890	 */
 891	fpu__save(fpu);
 892
 893	return get_xsave_addr(&fpu->state.xsave, xfeature_nr);
 894}
 895
 896#ifdef CONFIG_ARCH_HAS_PKEYS
 897
 898#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
 899#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
 900/*
 901 * This will go out and modify PKRU register to set the access
 902 * rights for @pkey to @init_val.
 903 */
 904int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 905		unsigned long init_val)
 906{
 907	u32 old_pkru;
 908	int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
 909	u32 new_pkru_bits = 0;
 910
 911	/*
 912	 * This check implies XSAVE support.  OSPKE only gets
 913	 * set if we enable XSAVE and we enable PKU in XCR0.
 914	 */
 915	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 
 
 
 
 
 
 
 
 916		return -EINVAL;
 917
 918	/* Set the bits we need in PKRU:  */
 919	if (init_val & PKEY_DISABLE_ACCESS)
 920		new_pkru_bits |= PKRU_AD_BIT;
 921	if (init_val & PKEY_DISABLE_WRITE)
 922		new_pkru_bits |= PKRU_WD_BIT;
 923
 924	/* Shift the bits in to the correct place in PKRU for pkey: */
 
 925	new_pkru_bits <<= pkey_shift;
 926
 927	/* Get old PKRU and mask off any old bits in place: */
 928	old_pkru = read_pkru();
 929	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
 930
 931	/* Write old part along with new part: */
 932	write_pkru(old_pkru | new_pkru_bits);
 933
 934	return 0;
 935}
 936#endif /* ! CONFIG_ARCH_HAS_PKEYS */
 937
 938/*
 939 * Weird legacy quirk: SSE and YMM states store information in the
 940 * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
 941 * area is marked as unused in the xfeatures header, we need to copy
 942 * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
 943 */
 944static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
 945{
 946	if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
 947		return false;
 948
 949	if (xfeatures & XFEATURE_MASK_FP)
 950		return false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 951
 952	return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 953}
 954
 955/*
 956 * This is similar to user_regset_copyout(), but will not add offset to
 957 * the source data pointer or increment pos, count, kbuf, and ubuf.
 
 
 
 
 
 
 
 
 958 */
 959static inline void
 960__copy_xstate_to_kernel(void *kbuf, const void *data,
 961			unsigned int offset, unsigned int size, unsigned int size_total)
 962{
 963	if (offset < size_total) {
 964		unsigned int copy = min(size, size_total - offset);
 
 
 965
 966		memcpy(kbuf + offset, data, copy);
 
 
 
 
 
 
 
 967	}
 
 968}
 969
 970/*
 971 * Convert from kernel XSAVES compacted format to standard format and copy
 972 * to a kernel-space ptrace buffer.
 
 
 
 
 
 
 
 973 *
 974 * It supports partial copy but pos always starts from zero. This is called
 975 * from xstateregs_get() and there we check the CPU has XSAVES.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 976 */
 977int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 
 978{
 
 979	unsigned int offset, size;
 980	struct xstate_header header;
 
 981	int i;
 982
 983	/*
 984	 * Currently copy_regset_to_user() starts from pos 0:
 985	 */
 986	if (unlikely(offset_start != 0))
 987		return -EFAULT;
 988
 989	/*
 990	 * The destination is a ptrace buffer; we put in only user xstates:
 991	 */
 992	memset(&header, 0, sizeof(header));
 993	header.xfeatures = xsave->header.xfeatures;
 994	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
 995
 996	/*
 997	 * Copy xregs_state->header:
 998	 */
 999	offset = offsetof(struct xregs_state, header);
1000	size = sizeof(header);
 
 
 
1001
1002	__copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
 
 
 
 
 
 
 
 
 
1003
1004	for (i = 0; i < XFEATURE_MAX; i++) {
1005		/*
1006		 * Copy only in-use xstates:
1007		 */
1008		if ((header.xfeatures >> i) & 1) {
1009			void *src = __raw_xsave_addr(xsave, i);
1010
1011			offset = xstate_offsets[i];
1012			size = xstate_sizes[i];
1013
1014			/* The next component has to fit fully into the output buffer: */
1015			if (offset + size > size_total)
1016				break;
1017
1018			__copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
1019		}
1020
1021	}
1022
1023	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
1024		offset = offsetof(struct fxregs_state, mxcsr);
1025		size = MXCSR_AND_FLAGS_SIZE;
1026		__copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
 
 
 
 
 
 
 
 
1027	}
1028
1029	/*
1030	 * Fill xsave->i387.sw_reserved value for ptrace frame:
 
1031	 */
1032	offset = offsetof(struct fxregs_state, sw_reserved);
1033	size = sizeof(xstate_fx_sw_bytes);
1034
1035	__copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
 
 
 
1036
1037	return 0;
1038}
1039
1040static inline int
1041__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
 
 
 
1042{
1043	if (!size)
1044		return 0;
1045
1046	if (offset < size_total) {
1047		unsigned int copy = min(size, size_total - offset);
 
 
 
 
 
 
 
 
1048
1049		if (__copy_to_user(ubuf + offset, data, copy))
1050			return -EFAULT;
1051	}
1052	return 0;
 
 
 
 
 
 
 
 
 
1053}
1054
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055/*
1056 * Convert from kernel XSAVES compacted format to standard format and copy
1057 * to a user-space buffer. It supports partial copy but pos always starts from
1058 * zero. This is called from xstateregs_get() and there we check the CPU
1059 * has XSAVES.
1060 */
1061int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
1062{
1063	unsigned int offset, size;
1064	int ret, i;
1065	struct xstate_header header;
 
 
 
 
 
 
 
 
1066
1067	/*
1068	 * Currently copy_regset_to_user() starts from pos 0:
 
 
1069	 */
1070	if (unlikely(offset_start != 0))
1071		return -EFAULT;
1072
1073	/*
1074	 * The destination is a ptrace buffer; we put in only user xstates:
 
1075	 */
1076	memset(&header, 0, sizeof(header));
1077	header.xfeatures = xsave->header.xfeatures;
1078	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
1079
1080	/*
1081	 * Copy xregs_state->header:
 
1082	 */
1083	offset = offsetof(struct xregs_state, header);
1084	size = sizeof(header);
1085
1086	ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
1087	if (ret)
1088		return ret;
1089
1090	for (i = 0; i < XFEATURE_MAX; i++) {
1091		/*
1092		 * Copy only in-use xstates:
1093		 */
1094		if ((header.xfeatures >> i) & 1) {
1095			void *src = __raw_xsave_addr(xsave, i);
1096
1097			offset = xstate_offsets[i];
1098			size = xstate_sizes[i];
1099
1100			/* The next component has to fit fully into the output buffer: */
1101			if (offset + size > size_total)
1102				break;
1103
1104			ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
1105			if (ret)
1106				return ret;
1107		}
1108
1109	}
1110
1111	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
1112		offset = offsetof(struct fxregs_state, mxcsr);
1113		size = MXCSR_AND_FLAGS_SIZE;
1114		__copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
1115	}
1116
1117	/*
1118	 * Fill xsave->i387.sw_reserved value for ptrace frame:
 
1119	 */
1120	offset = offsetof(struct fxregs_state, sw_reserved);
1121	size = sizeof(xstate_fx_sw_bytes);
1122
1123	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
1124	if (ret)
1125		return ret;
 
 
1126
 
 
 
 
 
 
 
 
1127	return 0;
1128}
 
1129
1130/*
1131 * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
1132 * and copy to the target thread. This is called from xstateregs_set().
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1133 */
1134int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 
1135{
1136	unsigned int offset, size;
1137	int i;
1138	struct xstate_header hdr;
 
1139
1140	offset = offsetof(struct xregs_state, header);
1141	size = sizeof(hdr);
1142
1143	memcpy(&hdr, kbuf + offset, size);
 
 
 
 
 
1144
1145	if (validate_xstate_header(&hdr))
1146		return -EINVAL;
 
 
 
1147
1148	for (i = 0; i < XFEATURE_MAX; i++) {
1149		u64 mask = ((u64)1 << i);
1150
1151		if (hdr.xfeatures & mask) {
1152			void *dst = __raw_xsave_addr(xsave, i);
 
 
 
 
 
1153
1154			offset = xstate_offsets[i];
1155			size = xstate_sizes[i];
 
 
 
 
 
 
1156
1157			memcpy(dst, kbuf + offset, size);
1158		}
 
 
 
 
 
 
 
 
 
 
 
 
1159	}
1160
1161	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
1162		offset = offsetof(struct fxregs_state, mxcsr);
1163		size = MXCSR_AND_FLAGS_SIZE;
1164		memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165	}
 
 
1166
 
 
1167	/*
1168	 * The state that came in from userspace was user-state only.
1169	 * Mask all the user states out of 'xfeatures':
1170	 */
1171	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
 
 
 
 
 
 
 
1172
1173	/*
1174	 * Add back in the features that came in from userspace:
1175	 */
1176	xsave->header.xfeatures |= hdr.xfeatures;
1177
1178	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1179}
1180
1181/*
1182 * Convert from a ptrace or sigreturn standard-format user-space buffer to
1183 * kernel XSAVES format and copy to the target thread. This is called from
1184 * xstateregs_set(), as well as potentially from the sigreturn() and
1185 * rt_sigreturn() system calls.
1186 */
1187int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
1188{
1189	unsigned int offset, size;
1190	int i;
1191	struct xstate_header hdr;
1192
1193	offset = offsetof(struct xregs_state, header);
1194	size = sizeof(hdr);
1195
1196	if (__copy_from_user(&hdr, ubuf + offset, size))
1197		return -EFAULT;
 
 
1198
1199	if (validate_xstate_header(&hdr))
1200		return -EINVAL;
1201
1202	for (i = 0; i < XFEATURE_MAX; i++) {
1203		u64 mask = ((u64)1 << i);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1204
1205		if (hdr.xfeatures & mask) {
1206			void *dst = __raw_xsave_addr(xsave, i);
 
 
 
 
 
 
 
 
 
 
1207
1208			offset = xstate_offsets[i];
1209			size = xstate_sizes[i];
 
 
 
 
1210
1211			if (__copy_from_user(dst, ubuf + offset, size))
1212				return -EFAULT;
1213		}
 
1214	}
1215
1216	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
1217		offset = offsetof(struct fxregs_state, mxcsr);
1218		size = MXCSR_AND_FLAGS_SIZE;
1219		if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
1220			return -EFAULT;
 
 
1221	}
1222
 
 
 
 
 
1223	/*
1224	 * The state that came in from userspace was user-state only.
1225	 * Mask all the user states out of 'xfeatures':
 
 
1226	 */
1227	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
1228
1229	/*
1230	 * Add back in the features that came in from userspace:
 
1231	 */
1232	xsave->header.xfeatures |= hdr.xfeatures;
1233
1234	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1235}
1236
1237#ifdef CONFIG_PROC_PID_ARCH_STATUS
1238/*
1239 * Report the amount of time elapsed in millisecond since last AVX512
1240 * use in the task.
1241 */
1242static void avx512_status(struct seq_file *m, struct task_struct *task)
1243{
1244	unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
1245	long delta;
1246
1247	if (!timestamp) {
1248		/*
1249		 * Report -1 if no AVX512 usage
1250		 */
1251		delta = -1;
1252	} else {
1253		delta = (long)(jiffies - timestamp);
1254		/*
1255		 * Cap to LONG_MAX if time difference > LONG_MAX
1256		 */
1257		if (delta < 0)
1258			delta = LONG_MAX;
1259		delta = jiffies_to_msecs(delta);
1260	}
1261
1262	seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
1263	seq_putc(m, '\n');
1264}
1265
1266/*
1267 * Report architecture specific information
1268 */
1269int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
1270			struct pid *pid, struct task_struct *task)
1271{
1272	/*
1273	 * Report AVX512 state if the processor and build option supported.
1274	 */
1275	if (cpu_feature_enabled(X86_FEATURE_AVX512F))
1276		avx512_status(m, task);
1277
1278	return 0;
1279}
1280#endif /* CONFIG_PROC_PID_ARCH_STATUS */
v6.8
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * xsave/xrstor support.
   4 *
   5 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
   6 */
   7#include <linux/bitops.h>
   8#include <linux/compat.h>
   9#include <linux/cpu.h>
  10#include <linux/mman.h>
  11#include <linux/nospec.h>
  12#include <linux/pkeys.h>
  13#include <linux/seq_file.h>
  14#include <linux/proc_fs.h>
  15#include <linux/vmalloc.h>
  16
  17#include <asm/fpu/api.h>
 
 
  18#include <asm/fpu/regset.h>
  19#include <asm/fpu/signal.h>
  20#include <asm/fpu/xcr.h>
  21
  22#include <asm/tlbflush.h>
  23#include <asm/prctl.h>
  24#include <asm/elf.h>
  25
  26#include "context.h"
  27#include "internal.h"
  28#include "legacy.h"
  29#include "xstate.h"
  30
  31#define for_each_extended_xfeature(bit, mask)				\
  32	(bit) = FIRST_EXTENDED_XFEATURE;				\
  33	for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
  34
  35/*
  36 * Although we spell it out in here, the Processor Trace
  37 * xfeature is completely unused.  We use other mechanisms
  38 * to save/restore PT state in Linux.
  39 */
  40static const char *xfeature_names[] =
  41{
  42	"x87 floating point registers",
  43	"SSE registers",
  44	"AVX registers",
  45	"MPX bounds registers",
  46	"MPX CSR",
  47	"AVX-512 opmask",
  48	"AVX-512 Hi256",
  49	"AVX-512 ZMM_Hi256",
  50	"Processor Trace (unused)",
  51	"Protection Keys User registers",
  52	"PASID state",
  53	"Control-flow User registers",
  54	"Control-flow Kernel registers (unused)",
  55	"unknown xstate feature",
  56	"unknown xstate feature",
  57	"unknown xstate feature",
  58	"unknown xstate feature",
  59	"AMX Tile config",
  60	"AMX Tile data",
  61	"unknown xstate feature",
  62};
  63
  64static unsigned short xsave_cpuid_features[] __initdata = {
  65	[XFEATURE_FP]				= X86_FEATURE_FPU,
  66	[XFEATURE_SSE]				= X86_FEATURE_XMM,
  67	[XFEATURE_YMM]				= X86_FEATURE_AVX,
  68	[XFEATURE_BNDREGS]			= X86_FEATURE_MPX,
  69	[XFEATURE_BNDCSR]			= X86_FEATURE_MPX,
  70	[XFEATURE_OPMASK]			= X86_FEATURE_AVX512F,
  71	[XFEATURE_ZMM_Hi256]			= X86_FEATURE_AVX512F,
  72	[XFEATURE_Hi16_ZMM]			= X86_FEATURE_AVX512F,
  73	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
  74	[XFEATURE_PKRU]				= X86_FEATURE_OSPKE,
  75	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
  76	[XFEATURE_CET_USER]			= X86_FEATURE_SHSTK,
  77	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
  78	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
  79};
  80
  81static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
  82	{ [ 0 ... XFEATURE_MAX - 1] = -1};
  83static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
  84	{ [ 0 ... XFEATURE_MAX - 1] = -1};
  85static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
  86
  87#define XSTATE_FLAG_SUPERVISOR	BIT(0)
  88#define XSTATE_FLAG_ALIGNED64	BIT(1)
 
 
 
 
 
 
 
 
  89
  90/*
  91 * Return whether the system supports a given xfeature.
  92 *
  93 * Also return the name of the (most advanced) feature that the caller requested:
  94 */
  95int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
  96{
  97	u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
  98
  99	if (unlikely(feature_name)) {
 100		long xfeature_idx, max_idx;
 101		u64 xfeatures_print;
 102		/*
 103		 * So we use FLS here to be able to print the most advanced
 104		 * feature that was requested but is missing. So if a driver
 105		 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
 106		 * missing AVX feature - this is the most informative message
 107		 * to users:
 108		 */
 109		if (xfeatures_missing)
 110			xfeatures_print = xfeatures_missing;
 111		else
 112			xfeatures_print = xfeatures_needed;
 113
 114		xfeature_idx = fls64(xfeatures_print)-1;
 115		max_idx = ARRAY_SIZE(xfeature_names)-1;
 116		xfeature_idx = min(xfeature_idx, max_idx);
 117
 118		*feature_name = xfeature_names[xfeature_idx];
 119	}
 120
 121	if (xfeatures_missing)
 122		return 0;
 123
 124	return 1;
 125}
 126EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
 127
 128static bool xfeature_is_aligned64(int xfeature_nr)
 129{
 130	return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
 
 
 
 
 
 
 
 
 
 
 
 131}
 132
 133static bool xfeature_is_supervisor(int xfeature_nr)
 134{
 135	return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
 136}
 137
 138static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 139{
 140	unsigned int offs, i;
 
 
 
 
 
 
 
 141
 142	/*
 143	 * Non-compacted format and legacy features use the cached fixed
 144	 * offsets.
 145	 */
 146	if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
 147	    xfeature <= XFEATURE_SSE)
 148		return xstate_offsets[xfeature];
 149
 150	/*
 151	 * Compacted format offsets depend on the actual content of the
 152	 * compacted xsave area which is determined by the xcomp_bv header
 153	 * field.
 154	 */
 155	offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
 156	for_each_extended_xfeature(i, xcomp_bv) {
 157		if (xfeature_is_aligned64(i))
 158			offs = ALIGN(offs, 64);
 159		if (i == xfeature)
 160			break;
 161		offs += xstate_sizes[i];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 162	}
 163	return offs;
 164}
 165
 166/*
 167 * Enable the extended processor state save/restore feature.
 168 * Called once per CPU onlining.
 169 */
 170void fpu__init_cpu_xstate(void)
 171{
 172	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
 173		return;
 174
 175	cr4_set_bits(X86_CR4_OSXSAVE);
 176
 177	/*
 178	 * Must happen after CR4 setup and before xsetbv() to allow KVM
 179	 * lazy passthrough.  Write independent of the dynamic state static
 180	 * key as that does not work on the boot CPU. This also ensures
 181	 * that any stale state is wiped out from XFD.
 182	 */
 183	if (cpu_feature_enabled(X86_FEATURE_XFD))
 184		wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
 185
 186	/*
 187	 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
 188	 * managed by XSAVE{C, OPT, S} and XRSTOR{S}.  Only XSAVE user
 189	 * states can be set here.
 190	 */
 191	xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
 192
 193	/*
 194	 * MSR_IA32_XSS sets supervisor states managed by XSAVES.
 195	 */
 196	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
 197		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
 198				     xfeatures_mask_independent());
 199	}
 200}
 201
 202static bool xfeature_enabled(enum xfeature xfeature)
 
 
 
 
 
 203{
 204	return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
 205}
 206
 207/*
 208 * Record the offsets and sizes of various xstates contained
 209 * in the XSAVE state memory layout.
 210 */
 211static void __init setup_xstate_cache(void)
 212{
 213	u32 eax, ebx, ecx, edx, i;
 214	/* start at the beginning of the "extended state" */
 215	unsigned int last_good_offset = offsetof(struct xregs_state,
 216						 extended_state_area);
 217	/*
 218	 * The FP xstates and SSE xstates are legacy states. They are always
 219	 * in the fixed offsets in the xsave area in either compacted form
 220	 * or standard form.
 221	 */
 222	xstate_offsets[XFEATURE_FP]	= 0;
 223	xstate_sizes[XFEATURE_FP]	= offsetof(struct fxregs_state,
 224						   xmm_space);
 225
 226	xstate_offsets[XFEATURE_SSE]	= xstate_sizes[XFEATURE_FP];
 227	xstate_sizes[XFEATURE_SSE]	= sizeof_field(struct fxregs_state,
 228						       xmm_space);
 
 229
 230	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
 231		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
 232
 233		xstate_sizes[i] = eax;
 234		xstate_flags[i] = ecx;
 235
 236		/*
 237		 * If an xfeature is supervisor state, the offset in EBX is
 238		 * invalid, leave it to -1.
 239		 */
 240		if (xfeature_is_supervisor(i))
 241			continue;
 242
 243		xstate_offsets[i] = ebx;
 244
 
 245		/*
 246		 * In our xstate size checks, we assume that the highest-numbered
 247		 * xstate feature has the highest offset in the buffer.  Ensure
 248		 * it does.
 249		 */
 250		WARN_ONCE(last_good_offset > xstate_offsets[i],
 251			  "x86/fpu: misordered xstate at %d\n", last_good_offset);
 252
 253		last_good_offset = xstate_offsets[i];
 254	}
 255}
 256
 257static void __init print_xstate_feature(u64 xstate_mask)
 258{
 259	const char *feature_name;
 260
 261	if (cpu_has_xfeatures(xstate_mask, &feature_name))
 262		pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
 263}
 264
 265/*
 266 * Print out all the supported xstate features:
 267 */
 268static void __init print_xstate_features(void)
 269{
 270	print_xstate_feature(XFEATURE_MASK_FP);
 271	print_xstate_feature(XFEATURE_MASK_SSE);
 272	print_xstate_feature(XFEATURE_MASK_YMM);
 273	print_xstate_feature(XFEATURE_MASK_BNDREGS);
 274	print_xstate_feature(XFEATURE_MASK_BNDCSR);
 275	print_xstate_feature(XFEATURE_MASK_OPMASK);
 276	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
 277	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
 278	print_xstate_feature(XFEATURE_MASK_PKRU);
 279	print_xstate_feature(XFEATURE_MASK_PASID);
 280	print_xstate_feature(XFEATURE_MASK_CET_USER);
 281	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
 282	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
 283}
 284
 285/*
 286 * This check is important because it is easy to get XSTATE_*
 287 * confused with XSTATE_BIT_*.
 288 */
 289#define CHECK_XFEATURE(nr) do {		\
 290	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
 291	WARN_ON(nr >= XFEATURE_MAX);	\
 292} while (0)
 293
 294/*
 295 * Print out xstate component offsets and sizes
 
 296 */
 297static void __init print_xstate_offset_size(void)
 298{
 299	int i;
 300
 301	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
 302		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
 303			i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
 304			i, xstate_sizes[i]);
 305	}
 
 
 
 306}
 307
 308/*
 309 * This function is called only during boot time when x86 caps are not set
 310 * up and alternative can not be used yet.
 
 311 */
 312static __init void os_xrstor_booting(struct xregs_state *xstate)
 313{
 314	u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
 315	u32 lmask = mask;
 316	u32 hmask = mask >> 32;
 317	int err;
 318
 319	if (cpu_feature_enabled(X86_FEATURE_XSAVES))
 320		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
 321	else
 322		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
 323
 324	/*
 325	 * We should never fault when copying from a kernel buffer, and the FPU
 326	 * state we set at boot time should be valid.
 
 327	 */
 328	WARN_ON_FPU(err);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 329}
 330
 331/*
 332 * All supported features have either init state all zeros or are
 333 * handled in setup_init_fpu() individually. This is an explicit
 334 * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
 335 * newly added supported features at build time and make people
 336 * actually look at the init state for the new feature.
 337 */
 338#define XFEATURES_INIT_FPSTATE_HANDLED		\
 339	(XFEATURE_MASK_FP |			\
 340	 XFEATURE_MASK_SSE |			\
 341	 XFEATURE_MASK_YMM |			\
 342	 XFEATURE_MASK_OPMASK |			\
 343	 XFEATURE_MASK_ZMM_Hi256 |		\
 344	 XFEATURE_MASK_Hi16_ZMM	 |		\
 345	 XFEATURE_MASK_PKRU |			\
 346	 XFEATURE_MASK_BNDREGS |		\
 347	 XFEATURE_MASK_BNDCSR |			\
 348	 XFEATURE_MASK_PASID |			\
 349	 XFEATURE_MASK_CET_USER |		\
 350	 XFEATURE_MASK_XTILE)
 351
 352/*
 353 * setup the xstate image representing the init state
 354 */
 355static void __init setup_init_fpu_buf(void)
 356{
 357	BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
 358		      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
 359		     XFEATURES_INIT_FPSTATE_HANDLED);
 
 360
 361	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 362		return;
 363
 
 364	print_xstate_features();
 365
 366	xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
 
 367
 368	/*
 369	 * Init all the features state with header.xfeatures being 0x0
 370	 */
 371	os_xrstor_booting(&init_fpstate.regs.xsave);
 372
 373	/*
 374	 * All components are now in init state. Read the state back so
 375	 * that init_fpstate contains all non-zero init state. This only
 376	 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
 377	 * those use the init optimization which skips writing data for
 378	 * components in init state.
 379	 *
 380	 * XSAVE could be used, but that would require to reshuffle the
 381	 * data when XSAVEC/S is available because XSAVEC/S uses xstate
 382	 * compaction. But doing so is a pointless exercise because most
 383	 * components have an all zeros init state except for the legacy
 384	 * ones (FP and SSE). Those can be saved with FXSAVE into the
 385	 * legacy area. Adding new features requires to ensure that init
 386	 * state is all zeroes or if not to add the necessary handling
 387	 * here.
 388	 */
 389	fxsave(&init_fpstate.regs.fxsave);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 390}
 391
 392int xfeature_size(int xfeature_nr)
 393{
 394	u32 eax, ebx, ecx, edx;
 395
 396	CHECK_XFEATURE(xfeature_nr);
 397	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
 398	return eax;
 399}
 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 401/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
 402static int validate_user_xstate_header(const struct xstate_header *hdr,
 403				       struct fpstate *fpstate)
 404{
 405	/* No unknown or supervisor features may be set */
 406	if (hdr->xfeatures & ~fpstate->user_xfeatures)
 407		return -EINVAL;
 408
 409	/* Userspace must use the uncompacted format */
 410	if (hdr->xcomp_bv)
 411		return -EINVAL;
 412
 413	/*
 414	 * If 'reserved' is shrunken to add a new field, make sure to validate
 415	 * that new field here!
 416	 */
 417	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
 418
 419	/* No reserved bits may be set */
 420	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
 421		return -EINVAL;
 422
 423	return 0;
 424}
 425
 426static void __init __xstate_dump_leaves(void)
 427{
 428	int i;
 429	u32 eax, ebx, ecx, edx;
 430	static int should_dump = 1;
 431
 432	if (!should_dump)
 433		return;
 434	should_dump = 0;
 435	/*
 436	 * Dump out a few leaves past the ones that we support
 437	 * just in case there are some goodies up there
 438	 */
 439	for (i = 0; i < XFEATURE_MAX + 10; i++) {
 440		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
 441		pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
 442			XSTATE_CPUID, i, eax, ebx, ecx, edx);
 443	}
 444}
 445
 446#define XSTATE_WARN_ON(x, fmt, ...) do {					\
 447	if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) {	\
 448		__xstate_dump_leaves();						\
 449	}									\
 450} while (0)
 451
 452#define XCHECK_SZ(sz, nr, __struct) ({					\
 453	if (WARN_ONCE(sz != sizeof(__struct),				\
 454	    "[%s]: struct is %zu bytes, cpu state %d bytes\n",		\
 455	    xfeature_names[nr], sizeof(__struct), sz)) {		\
 
 456		__xstate_dump_leaves();					\
 457	}								\
 458	true;								\
 459})
 460
 461
 462/**
 463 * check_xtile_data_against_struct - Check tile data state size.
 464 *
 465 * Calculate the state size by multiplying the single tile size which is
 466 * recorded in a C struct, and the number of tiles that the CPU informs.
 467 * Compare the provided size with the calculation.
 468 *
 469 * @size:	The tile data state size
 470 *
 471 * Returns:	0 on success, -EINVAL on mismatch.
 472 */
 473static int __init check_xtile_data_against_struct(int size)
 474{
 475	u32 max_palid, palid, state_size;
 476	u32 eax, ebx, ecx, edx;
 477	u16 max_tile;
 478
 479	/*
 480	 * Check the maximum palette id:
 481	 *   eax: the highest numbered palette subleaf.
 482	 */
 483	cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
 484
 485	/*
 486	 * Cross-check each tile size and find the maximum number of
 487	 * supported tiles.
 488	 */
 489	for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
 490		u16 tile_size, max;
 491
 492		/*
 493		 * Check the tile size info:
 494		 *   eax[31:16]:  bytes per title
 495		 *   ebx[31:16]:  the max names (or max number of tiles)
 496		 */
 497		cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
 498		tile_size = eax >> 16;
 499		max = ebx >> 16;
 500
 501		if (tile_size != sizeof(struct xtile_data)) {
 502			pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
 503			       __stringify(XFEATURE_XTILE_DATA),
 504			       sizeof(struct xtile_data), tile_size);
 505			__xstate_dump_leaves();
 506			return -EINVAL;
 507		}
 508
 509		if (max > max_tile)
 510			max_tile = max;
 511	}
 512
 513	state_size = sizeof(struct xtile_data) * max_tile;
 514	if (size != state_size) {
 515		pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
 516		       __stringify(XFEATURE_XTILE_DATA), state_size, size);
 517		__xstate_dump_leaves();
 518		return -EINVAL;
 519	}
 520	return 0;
 521}
 522
 523/*
 524 * We have a C struct for each 'xstate'.  We need to ensure
 525 * that our software representation matches what the CPU
 526 * tells us about the state's size.
 527 */
 528static bool __init check_xstate_against_struct(int nr)
 529{
 530	/*
 531	 * Ask the CPU for the size of the state.
 532	 */
 533	int sz = xfeature_size(nr);
 534
 535	/*
 536	 * Match each CPU state with the corresponding software
 537	 * structure.
 538	 */
 539	switch (nr) {
 540	case XFEATURE_YMM:	  return XCHECK_SZ(sz, nr, struct ymmh_struct);
 541	case XFEATURE_BNDREGS:	  return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
 542	case XFEATURE_BNDCSR:	  return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
 543	case XFEATURE_OPMASK:	  return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
 544	case XFEATURE_ZMM_Hi256:  return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
 545	case XFEATURE_Hi16_ZMM:	  return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
 546	case XFEATURE_PKRU:	  return XCHECK_SZ(sz, nr, struct pkru_state);
 547	case XFEATURE_PASID:	  return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
 548	case XFEATURE_XTILE_CFG:  return XCHECK_SZ(sz, nr, struct xtile_cfg);
 549	case XFEATURE_CET_USER:	  return XCHECK_SZ(sz, nr, struct cet_user_state);
 550	case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
 551	default:
 552		XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
 553		return false;
 
 
 
 554	}
 555
 556	return true;
 557}
 558
 559static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
 560{
 561	unsigned int topmost = fls64(xfeatures) -  1;
 562	unsigned int offset = xstate_offsets[topmost];
 563
 564	if (topmost <= XFEATURE_SSE)
 565		return sizeof(struct xregs_state);
 566
 567	if (compacted)
 568		offset = xfeature_get_offset(xfeatures, topmost);
 569	return offset + xstate_sizes[topmost];
 570}
 571
 572/*
 573 * This essentially double-checks what the cpu told us about
 574 * how large the XSAVE buffer needs to be.  We are recalculating
 575 * it to be safe.
 576 *
 577 * Independent XSAVE features allocate their own buffers and are not
 578 * covered by these checks. Only the size of the buffer for task->fpu
 579 * is checked here.
 580 */
 581static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
 582{
 583	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
 584	bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
 585	unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
 586	int i;
 587
 588	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
 589		if (!check_xstate_against_struct(i))
 590			return false;
 
 
 591		/*
 592		 * Supervisor state components can be managed only by
 593		 * XSAVES.
 
 
 
 
 
 
 
 
 
 
 
 
 594		 */
 595		if (!xsaves && xfeature_is_supervisor(i)) {
 596			XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
 597			return false;
 598		}
 
 
 
 599	}
 600	size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
 601	XSTATE_WARN_ON(size != kernel_size,
 602		       "size %u != kernel_size %u\n", size, kernel_size);
 603	return size == kernel_size;
 604}
 605
 
 606/*
 607 * Get total size of enabled xstates in XCR0 | IA32_XSS.
 608 *
 609 * Note the SDM's wording here.  "sub-function 0" only enumerates
 610 * the size of the *user* states.  If we use it to size a buffer
 611 * that we use 'XSAVES' on, we could potentially overflow the
 612 * buffer because 'XSAVES' saves system states too.
 613 *
 614 * This also takes compaction into account. So this works for
 615 * XSAVEC as well.
 616 */
 617static unsigned int __init get_compacted_size(void)
 618{
 619	unsigned int eax, ebx, ecx, edx;
 620	/*
 621	 * - CPUID function 0DH, sub-function 1:
 622	 *    EBX enumerates the size (in bytes) required by
 623	 *    the XSAVES instruction for an XSAVE area
 624	 *    containing all the state components
 625	 *    corresponding to bits currently set in
 626	 *    XCR0 | IA32_XSS.
 627	 *
 628	 * When XSAVES is not available but XSAVEC is (virt), then there
 629	 * are no supervisor states, but XSAVEC still uses compacted
 630	 * format.
 631	 */
 632	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
 633	return ebx;
 634}
 635
 636/*
 637 * Get the total size of the enabled xstates without the independent supervisor
 638 * features.
 639 */
 640static unsigned int __init get_xsave_compacted_size(void)
 641{
 642	u64 mask = xfeatures_mask_independent();
 643	unsigned int size;
 644
 645	if (!mask)
 646		return get_compacted_size();
 647
 648	/* Disable independent features. */
 649	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
 650
 651	/*
 652	 * Ask the hardware what size is required of the buffer.
 653	 * This is the size required for the task->fpu buffer.
 654	 */
 655	size = get_compacted_size();
 656
 657	/* Re-enable independent features so XSAVES will work on them again. */
 658	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
 659
 660	return size;
 661}
 662
 663static unsigned int __init get_xsave_size_user(void)
 664{
 665	unsigned int eax, ebx, ecx, edx;
 666	/*
 667	 * - CPUID function 0DH, sub-function 0:
 668	 *    EBX enumerates the size (in bytes) required by
 669	 *    the XSAVE instruction for an XSAVE area
 670	 *    containing all the *user* state components
 671	 *    corresponding to bits currently set in XCR0.
 672	 */
 673	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
 674	return ebx;
 675}
 676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 677static int __init init_xstate_size(void)
 678{
 679	/* Recompute the context size for enabled features: */
 680	unsigned int user_size, kernel_size, kernel_default_size;
 681	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
 682
 683	/* Uncompacted user space size */
 684	user_size = get_xsave_size_user();
 685
 686	/*
 687	 * XSAVES kernel size includes supervisor states and uses compacted
 688	 * format. XSAVEC uses compacted format, but does not save
 689	 * supervisor states.
 690	 *
 691	 * XSAVE[OPT] do not support supervisor states so kernel and user
 692	 * size is identical.
 693	 */
 694	if (compacted)
 695		kernel_size = get_xsave_compacted_size();
 696	else
 697		kernel_size = user_size;
 698
 699	kernel_default_size =
 700		xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
 701
 702	if (!paranoid_xstate_size_valid(kernel_size))
 703		return -EINVAL;
 704
 705	fpu_kernel_cfg.max_size = kernel_size;
 706	fpu_user_cfg.max_size = user_size;
 707
 708	fpu_kernel_cfg.default_size = kernel_default_size;
 709	fpu_user_cfg.default_size =
 710		xstate_calculate_size(fpu_user_cfg.default_features, false);
 711
 
 
 
 
 712	return 0;
 713}
 714
 715/*
 716 * We enabled the XSAVE hardware, but something went wrong and
 717 * we can not use it.  Disable it.
 718 */
 719static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
 720{
 721	fpu_kernel_cfg.max_features = 0;
 722	cr4_clear_bits(X86_CR4_OSXSAVE);
 723	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 724
 725	/* Restore the legacy size.*/
 726	fpu_kernel_cfg.max_size = legacy_size;
 727	fpu_kernel_cfg.default_size = legacy_size;
 728	fpu_user_cfg.max_size = legacy_size;
 729	fpu_user_cfg.default_size = legacy_size;
 730
 731	/*
 732	 * Prevent enabling the static branch which enables writes to the
 733	 * XFD MSR.
 734	 */
 735	init_fpstate.xfd = 0;
 736
 737	fpstate_reset(&current->thread.fpu);
 738}
 739
 740/*
 741 * Enable and initialize the xsave feature.
 742 * Called once per system bootup.
 743 */
 744void __init fpu__init_system_xstate(unsigned int legacy_size)
 745{
 746	unsigned int eax, ebx, ecx, edx;
 747	u64 xfeatures;
 748	int err;
 749	int i;
 750
 
 
 
 751	if (!boot_cpu_has(X86_FEATURE_FPU)) {
 752		pr_info("x86/fpu: No FPU detected\n");
 753		return;
 754	}
 755
 756	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
 757		pr_info("x86/fpu: x87 FPU will use %s\n",
 758			boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
 759		return;
 760	}
 761
 762	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
 763		WARN_ON_FPU(1);
 764		return;
 765	}
 766
 767	/*
 768	 * Find user xstates supported by the processor.
 769	 */
 770	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
 771	fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
 772
 773	/*
 774	 * Find supervisor xstates supported by the processor.
 775	 */
 776	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
 777	fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
 778
 779	if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
 780		/*
 781		 * This indicates that something really unexpected happened
 782		 * with the enumeration.  Disable XSAVE and try to continue
 783		 * booting without it.  This is too early to BUG().
 784		 */
 785		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
 786		       fpu_kernel_cfg.max_features);
 787		goto out_disable;
 788	}
 789
 790	/*
 791	 * Clear XSAVE features that are disabled in the normal CPUID.
 792	 */
 793	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
 794		unsigned short cid = xsave_cpuid_features[i];
 795
 796		/* Careful: X86_FEATURE_FPU is 0! */
 797		if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
 798			fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
 799	}
 800
 801	if (!cpu_feature_enabled(X86_FEATURE_XFD))
 802		fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
 803
 804	if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
 805		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
 806	else
 807		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
 808					XFEATURE_MASK_SUPERVISOR_SUPPORTED;
 809
 810	fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
 811	fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
 812
 813	/* Clean out dynamic features from default */
 814	fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
 815	fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
 816
 817	fpu_user_cfg.default_features = fpu_user_cfg.max_features;
 818	fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
 819
 820	/* Store it for paranoia check at the end */
 821	xfeatures = fpu_kernel_cfg.max_features;
 822
 823	/*
 824	 * Initialize the default XFD state in initfp_state and enable the
 825	 * dynamic sizing mechanism if dynamic states are available.  The
 826	 * static key cannot be enabled here because this runs before
 827	 * jump_label_init(). This is delayed to an initcall.
 828	 */
 829	init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
 830
 831	/* Set up compaction feature bit */
 832	if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
 833	    cpu_feature_enabled(X86_FEATURE_XSAVES))
 834		setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
 835
 836	/* Enable xstate instructions to be able to continue with initialization: */
 837	fpu__init_cpu_xstate();
 838
 839	/* Cache size, offset and flags for initialization */
 840	setup_xstate_cache();
 841
 842	err = init_xstate_size();
 843	if (err)
 844		goto out_disable;
 845
 846	/* Reset the state for the current task */
 847	fpstate_reset(&current->thread.fpu);
 848
 849	/*
 850	 * Update info used for ptrace frames; use standard-format size and no
 851	 * supervisor xstates:
 852	 */
 853	update_regset_xstate_info(fpu_user_cfg.max_size,
 854				  fpu_user_cfg.max_features);
 855
 856	/*
 857	 * init_fpstate excludes dynamic states as they are large but init
 858	 * state is zero.
 859	 */
 860	init_fpstate.size		= fpu_kernel_cfg.default_size;
 861	init_fpstate.xfeatures		= fpu_kernel_cfg.default_features;
 862
 863	if (init_fpstate.size > sizeof(init_fpstate.regs)) {
 864		pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
 865			sizeof(init_fpstate.regs), init_fpstate.size);
 866		goto out_disable;
 867	}
 868
 
 869	setup_init_fpu_buf();
 
 
 870
 871	/*
 872	 * Paranoia check whether something in the setup modified the
 873	 * xfeatures mask.
 874	 */
 875	if (xfeatures != fpu_kernel_cfg.max_features) {
 876		pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
 877		       xfeatures, fpu_kernel_cfg.max_features);
 878		goto out_disable;
 879	}
 880
 881	/*
 882	 * CPU capabilities initialization runs before FPU init. So
 883	 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
 884	 * functional, set the feature bit so depending code works.
 885	 */
 886	setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
 887
 888	print_xstate_offset_size();
 889	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 890		fpu_kernel_cfg.max_features,
 891		fpu_kernel_cfg.max_size,
 892		boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
 893	return;
 894
 895out_disable:
 896	/* something went wrong, try to boot without any XSAVE support */
 897	fpu__init_disable_system_xstate(legacy_size);
 898}
 899
 900/*
 901 * Restore minimal FPU state after suspend:
 902 */
 903void fpu__resume_cpu(void)
 904{
 905	/*
 906	 * Restore XCR0 on xsave capable CPUs:
 907	 */
 908	if (cpu_feature_enabled(X86_FEATURE_XSAVE))
 909		xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
 910
 911	/*
 912	 * Restore IA32_XSS. The same CPUID bit enumerates support
 913	 * of XSAVES and MSR_IA32_XSS.
 914	 */
 915	if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
 916		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
 917				     xfeatures_mask_independent());
 918	}
 919
 920	if (fpu_state_size_dynamic())
 921		wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
 922}
 923
 924/*
 925 * Given an xstate feature nr, calculate where in the xsave
 926 * buffer the state is.  Callers should ensure that the buffer
 927 * is valid.
 928 */
 929static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 930{
 931	u64 xcomp_bv = xsave->header.xcomp_bv;
 932
 933	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
 934		return NULL;
 935
 936	if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
 937		if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
 938			return NULL;
 939	}
 940
 941	return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
 942}
 943
 944/*
 945 * Given the xsave area and a state inside, this function returns the
 946 * address of the state.
 947 *
 948 * This is the API that is called to get xstate address in either
 949 * standard format or compacted format of xsave area.
 950 *
 951 * Note that if there is no data for the field in the xsave buffer
 952 * this will return NULL.
 953 *
 954 * Inputs:
 955 *	xstate: the thread's storage area for all FPU data
 956 *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
 957 *	XFEATURE_SSE, etc...)
 958 * Output:
 959 *	address of the state in the xsave area, or NULL if the
 960 *	field is not present in the xsave buffer.
 961 */
 962void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 963{
 964	/*
 965	 * Do we even *have* xsave state?
 966	 */
 967	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 968		return NULL;
 969
 970	/*
 971	 * We should not ever be requesting features that we
 972	 * have not enabled.
 
 973	 */
 974	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
 975		return NULL;
 976
 977	/*
 978	 * This assumes the last 'xsave*' instruction to
 979	 * have requested that 'xfeature_nr' be saved.
 980	 * If it did not, we might be seeing and old value
 981	 * of the field in the buffer.
 982	 *
 983	 * This can happen because the last 'xsave' did not
 984	 * request that this feature be saved (unlikely)
 985	 * or because the "init optimization" caused it
 986	 * to not be saved.
 987	 */
 988	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
 989		return NULL;
 990
 991	return __raw_xsave_addr(xsave, xfeature_nr);
 992}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 993
 994#ifdef CONFIG_ARCH_HAS_PKEYS
 995
 
 
 996/*
 997 * This will go out and modify PKRU register to set the access
 998 * rights for @pkey to @init_val.
 999 */
1000int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
1001			      unsigned long init_val)
1002{
1003	u32 old_pkru, new_pkru_bits = 0;
1004	int pkey_shift;
 
1005
1006	/*
1007	 * This check implies XSAVE support.  OSPKE only gets
1008	 * set if we enable XSAVE and we enable PKU in XCR0.
1009	 */
1010	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
1011		return -EINVAL;
1012
1013	/*
1014	 * This code should only be called with valid 'pkey'
1015	 * values originating from in-kernel users.  Complain
1016	 * if a bad value is observed.
1017	 */
1018	if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
1019		return -EINVAL;
1020
1021	/* Set the bits we need in PKRU:  */
1022	if (init_val & PKEY_DISABLE_ACCESS)
1023		new_pkru_bits |= PKRU_AD_BIT;
1024	if (init_val & PKEY_DISABLE_WRITE)
1025		new_pkru_bits |= PKRU_WD_BIT;
1026
1027	/* Shift the bits in to the correct place in PKRU for pkey: */
1028	pkey_shift = pkey * PKRU_BITS_PER_PKEY;
1029	new_pkru_bits <<= pkey_shift;
1030
1031	/* Get old PKRU and mask off any old bits in place: */
1032	old_pkru = read_pkru();
1033	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
1034
1035	/* Write old part along with new part: */
1036	write_pkru(old_pkru | new_pkru_bits);
1037
1038	return 0;
1039}
1040#endif /* ! CONFIG_ARCH_HAS_PKEYS */
1041
1042static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
1043			 void *init_xstate, unsigned int size)
 
 
 
 
 
1044{
1045	membuf_write(to, from_xstate ? xstate : init_xstate, size);
1046}
1047
1048/**
1049 * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1050 * @to:		membuf descriptor
1051 * @fpstate:	The fpstate buffer from which to copy
1052 * @xfeatures:	The mask of xfeatures to save (XSAVE mode only)
1053 * @pkru_val:	The PKRU value to store in the PKRU component
1054 * @copy_mode:	The requested copy mode
1055 *
1056 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1057 * format, i.e. from the kernel internal hardware dependent storage format
1058 * to the requested @mode. UABI XSTATE is always uncompacted!
1059 *
1060 * It supports partial copy but @to.pos always starts from zero.
1061 */
1062void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
1063			       u64 xfeatures, u32 pkru_val,
1064			       enum xstate_copy_mode copy_mode)
1065{
1066	const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1067	struct xregs_state *xinit = &init_fpstate.regs.xsave;
1068	struct xregs_state *xsave = &fpstate->regs.xsave;
1069	struct xstate_header header;
1070	unsigned int zerofrom;
1071	u64 mask;
1072	int i;
1073
1074	memset(&header, 0, sizeof(header));
1075	header.xfeatures = xsave->header.xfeatures;
1076
1077	/* Mask out the feature bits depending on copy mode */
1078	switch (copy_mode) {
1079	case XSTATE_COPY_FP:
1080		header.xfeatures &= XFEATURE_MASK_FP;
1081		break;
1082
1083	case XSTATE_COPY_FX:
1084		header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1085		break;
1086
1087	case XSTATE_COPY_XSAVE:
1088		header.xfeatures &= fpstate->user_xfeatures & xfeatures;
1089		break;
1090	}
1091
1092	/* Copy FP state up to MXCSR */
1093	copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
1094		     &xinit->i387, off_mxcsr);
1095
1096	/* Copy MXCSR when SSE or YMM are set in the feature mask */
1097	copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
1098		     &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
1099		     MXCSR_AND_FLAGS_SIZE);
1100
1101	/* Copy the remaining FP state */
1102	copy_feature(header.xfeatures & XFEATURE_MASK_FP,
1103		     &to, &xsave->i387.st_space, &xinit->i387.st_space,
1104		     sizeof(xsave->i387.st_space));
1105
1106	/* Copy the SSE state - shared with YMM, but independently managed */
1107	copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
1108		     &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
1109		     sizeof(xsave->i387.xmm_space));
1110
1111	if (copy_mode != XSTATE_COPY_XSAVE)
1112		goto out;
1113
1114	/* Zero the padding area */
1115	membuf_zero(&to, sizeof(xsave->i387.padding));
1116
1117	/* Copy xsave->i387.sw_reserved */
1118	membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
1119
1120	/* Copy the user space relevant state of @xsave->header */
1121	membuf_write(&to, &header, sizeof(header));
1122
1123	zerofrom = offsetof(struct xregs_state, extended_state_area);
1124
1125	/*
1126	 * This 'mask' indicates which states to copy from fpstate.
1127	 * Those extended states that are not present in fpstate are
1128	 * either disabled or initialized:
1129	 *
1130	 * In non-compacted format, disabled features still occupy
1131	 * state space but there is no state to copy from in the
1132	 * compacted init_fpstate. The gap tracking will zero these
1133	 * states.
1134	 *
1135	 * The extended features have an all zeroes init state. Thus,
1136	 * remove them from 'mask' to zero those features in the user
1137	 * buffer instead of retrieving them from init_fpstate.
1138	 */
1139	mask = header.xfeatures;
1140
1141	for_each_extended_xfeature(i, mask) {
1142		/*
1143		 * If there was a feature or alignment gap, zero the space
1144		 * in the destination buffer.
1145		 */
1146		if (zerofrom < xstate_offsets[i])
1147			membuf_zero(&to, xstate_offsets[i] - zerofrom);
1148
1149		if (i == XFEATURE_PKRU) {
1150			struct pkru_state pkru = {0};
1151			/*
1152			 * PKRU is not necessarily up to date in the
1153			 * XSAVE buffer. Use the provided value.
1154			 */
1155			pkru.pkru = pkru_val;
1156			membuf_write(&to, &pkru, sizeof(pkru));
1157		} else {
1158			membuf_write(&to,
1159				     __raw_xsave_addr(xsave, i),
1160				     xstate_sizes[i]);
1161		}
1162		/*
1163		 * Keep track of the last copied state in the non-compacted
1164		 * target buffer for gap zeroing.
1165		 */
1166		zerofrom = xstate_offsets[i] + xstate_sizes[i];
1167	}
1168
1169out:
1170	if (to.left)
1171		membuf_zero(&to, to.left);
1172}
1173
1174/**
1175 * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1176 * @to:		membuf descriptor
1177 * @tsk:	The task from which to copy the saved xstate
1178 * @copy_mode:	The requested copy mode
1179 *
1180 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1181 * format, i.e. from the kernel internal hardware dependent storage format
1182 * to the requested @mode. UABI XSTATE is always uncompacted!
1183 *
1184 * It supports partial copy but @to.pos always starts from zero.
1185 */
1186void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1187			     enum xstate_copy_mode copy_mode)
 
1188{
1189	__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
1190				  tsk->thread.fpu.fpstate->user_xfeatures,
1191				  tsk->thread.pkru, copy_mode);
1192}
1193
1194static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1195			    const void *kbuf, const void __user *ubuf)
1196{
1197	if (kbuf) {
1198		memcpy(dst, kbuf + offset, size);
1199	} else {
1200		if (copy_from_user(dst, ubuf + offset, size))
1201			return -EFAULT;
1202	}
1203	return 0;
1204}
1205
1206
1207/**
1208 * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
1209 * @fpstate:	The fpstate buffer to copy to
1210 * @kbuf:	The UABI format buffer, if it comes from the kernel
1211 * @ubuf:	The UABI format buffer, if it comes from userspace
1212 * @pkru:	The location to write the PKRU value to
1213 *
1214 * Converts from the UABI format into the kernel internal hardware
1215 * dependent format.
1216 *
1217 * This function ultimately has three different callers with distinct PKRU
1218 * behavior.
1219 * 1.	When called from sigreturn the PKRU register will be restored from
1220 *	@fpstate via an XRSTOR. Correctly copying the UABI format buffer to
1221 *	@fpstate is sufficient to cover this case, but the caller will also
1222 *	pass a pointer to the thread_struct's pkru field in @pkru and updating
1223 *	it is harmless.
1224 * 2.	When called from ptrace the PKRU register will be restored from the
1225 *	thread_struct's pkru field. A pointer to that is passed in @pkru.
1226 *	The kernel will restore it manually, so the XRSTOR behavior that resets
1227 *	the PKRU register to the hardware init value (0) if the corresponding
1228 *	xfeatures bit is not set is emulated here.
1229 * 3.	When called from KVM the PKRU register will be restored from the vcpu's
1230 *	pkru field. A pointer to that is passed in @pkru. KVM hasn't used
1231 *	XRSTOR and hasn't had the PKRU resetting behavior described above. To
1232 *	preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
1233 *	bit is not set.
1234 */
1235static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
1236			       const void __user *ubuf, u32 *pkru)
1237{
1238	struct xregs_state *xsave = &fpstate->regs.xsave;
1239	unsigned int offset, size;
1240	struct xstate_header hdr;
1241	u64 mask;
1242	int i;
1243
1244	offset = offsetof(struct xregs_state, header);
1245	if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
 
 
1246		return -EFAULT;
1247
1248	if (validate_user_xstate_header(&hdr, fpstate))
1249		return -EINVAL;
 
 
 
 
1250
1251	/* Validate MXCSR when any of the related features is in use */
1252	mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1253	if (hdr.xfeatures & mask) {
1254		u32 mxcsr[2];
1255
1256		offset = offsetof(struct fxregs_state, mxcsr);
1257		if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1258			return -EFAULT;
1259
1260		/* Reserved bits in MXCSR must be zero. */
1261		if (mxcsr[0] & ~mxcsr_feature_mask)
1262			return -EINVAL;
1263
1264		/* SSE and YMM require MXCSR even when FP is not in use. */
1265		if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1266			xsave->i387.mxcsr = mxcsr[0];
1267			xsave->i387.mxcsr_mask = mxcsr[1];
1268		}
1269	}
1270
1271	for (i = 0; i < XFEATURE_MAX; i++) {
1272		mask = BIT_ULL(i);
1273
1274		if (hdr.xfeatures & mask) {
1275			void *dst = __raw_xsave_addr(xsave, i);
 
1276
1277			offset = xstate_offsets[i];
1278			size = xstate_sizes[i];
1279
1280			if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1281				return -EFAULT;
 
 
 
1282		}
 
1283	}
1284
1285	if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
1286		struct pkru_state *xpkru;
1287
1288		xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
1289		*pkru = xpkru->pkru;
1290	} else {
1291		/*
1292		 * KVM may pass NULL here to indicate that it does not need
1293		 * PKRU updated.
1294		 */
1295		if (pkru)
1296			*pkru = 0;
1297	}
1298
1299	/*
1300	 * The state that came in from userspace was user-state only.
1301	 * Mask all the user states out of 'xfeatures':
1302	 */
1303	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
 
1304
1305	/*
1306	 * Add back in the features that came in from userspace:
1307	 */
1308	xsave->header.xfeatures |= hdr.xfeatures;
1309
1310	return 0;
1311}
1312
1313/*
1314 * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1315 * format and copy to the target thread. Used by ptrace and KVM.
1316 */
1317int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
1318{
1319	return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
1320}
1321
1322/*
1323 * Convert from a sigreturn standard-format user-space buffer to kernel
1324 * XSAVE[S] format and copy to the target thread. This is called from the
1325 * sigreturn() and rt_sigreturn() system calls.
1326 */
1327int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
1328				      const void __user *ubuf)
1329{
1330	return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
1331}
1332
1333static bool validate_independent_components(u64 mask)
1334{
1335	u64 xchk;
1336
1337	if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1338		return false;
1339
1340	xchk = ~xfeatures_mask_independent();
1341
1342	if (WARN_ON_ONCE(!mask || mask & xchk))
1343		return false;
1344
1345	return true;
1346}
1347
1348/**
1349 * xsaves - Save selected components to a kernel xstate buffer
1350 * @xstate:	Pointer to the buffer
1351 * @mask:	Feature mask to select the components to save
1352 *
1353 * The @xstate buffer must be 64 byte aligned and correctly initialized as
1354 * XSAVES does not write the full xstate header. Before first use the
1355 * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1356 * can #GP.
1357 *
1358 * The feature mask must be a subset of the independent features.
1359 */
1360void xsaves(struct xregs_state *xstate, u64 mask)
1361{
1362	int err;
1363
1364	if (!validate_independent_components(mask))
1365		return;
1366
1367	XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1368	WARN_ON_ONCE(err);
1369}
1370
1371/**
1372 * xrstors - Restore selected components from a kernel xstate buffer
1373 * @xstate:	Pointer to the buffer
1374 * @mask:	Feature mask to select the components to restore
1375 *
1376 * The @xstate buffer must be 64 byte aligned and correctly initialized
1377 * otherwise XRSTORS from that buffer can #GP.
1378 *
1379 * Proper usage is to restore the state which was saved with
1380 * xsaves() into @xstate.
1381 *
1382 * The feature mask must be a subset of the independent features.
1383 */
1384void xrstors(struct xregs_state *xstate, u64 mask)
1385{
1386	int err;
1387
1388	if (!validate_independent_components(mask))
1389		return;
1390
1391	XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1392	WARN_ON_ONCE(err);
1393}
1394
1395#if IS_ENABLED(CONFIG_KVM)
1396void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
1397{
1398	void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
1399
1400	if (addr)
1401		memset(addr, 0, xstate_sizes[xfeature]);
1402}
1403EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
1404#endif
1405
1406#ifdef CONFIG_X86_64
1407
1408#ifdef CONFIG_X86_DEBUG_FPU
1409/*
1410 * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
1411 * can safely operate on the @fpstate buffer.
 
 
1412 */
1413static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
1414{
1415	u64 xfd = __this_cpu_read(xfd_state);
1416
1417	if (fpstate->xfd == xfd)
1418		return true;
1419
1420	 /*
1421	  * The XFD MSR does not match fpstate->xfd. That's invalid when
1422	  * the passed in fpstate is current's fpstate.
1423	  */
1424	if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
1425		return false;
1426
1427	/*
1428	 * XRSTOR(S) from init_fpstate are always correct as it will just
1429	 * bring all components into init state and not read from the
1430	 * buffer. XSAVE(S) raises #PF after init.
1431	 */
1432	if (fpstate == &init_fpstate)
1433		return rstor;
1434
1435	/*
1436	 * XSAVE(S): clone(), fpu_swap_kvm_fpu()
1437	 * XRSTORS(S): fpu_swap_kvm_fpu()
1438	 */
 
 
 
1439
1440	/*
1441	 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
1442	 * the buffer area for XFD-disabled state components.
1443	 */
1444	mask &= ~xfd;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1445
1446	/*
1447	 * Remove features which are valid in fpstate. They
1448	 * have space allocated in fpstate.
1449	 */
1450	mask &= ~fpstate->xfeatures;
1451
1452	/*
1453	 * Any remaining state components in 'mask' might be written
1454	 * by XSAVE/XRSTOR. Fail validation it found.
1455	 */
1456	return !mask;
1457}
1458
1459void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
1460{
1461	WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
1462}
1463#endif /* CONFIG_X86_DEBUG_FPU */
1464
1465static int __init xfd_update_static_branch(void)
1466{
1467	/*
1468	 * If init_fpstate.xfd has bits set then dynamic features are
1469	 * available and the dynamic sizing must be enabled.
1470	 */
1471	if (init_fpstate.xfd)
1472		static_branch_enable(&__fpu_state_size_dynamic);
1473	return 0;
1474}
1475arch_initcall(xfd_update_static_branch)
1476
1477void fpstate_free(struct fpu *fpu)
1478{
1479	if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1480		vfree(fpu->fpstate);
1481}
1482
1483/**
1484 * fpstate_realloc - Reallocate struct fpstate for the requested new features
1485 *
1486 * @xfeatures:	A bitmap of xstate features which extend the enabled features
1487 *		of that task
1488 * @ksize:	The required size for the kernel buffer
1489 * @usize:	The required size for user space buffers
1490 * @guest_fpu:	Pointer to a guest FPU container. NULL for host allocations
1491 *
1492 * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1493 * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1494 * with large states are likely to live longer.
1495 *
1496 * Returns: 0 on success, -ENOMEM on allocation error.
1497 */
1498static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1499			   unsigned int usize, struct fpu_guest *guest_fpu)
1500{
1501	struct fpu *fpu = &current->thread.fpu;
1502	struct fpstate *curfps, *newfps = NULL;
1503	unsigned int fpsize;
1504	bool in_use;
1505
1506	fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
 
1507
1508	newfps = vzalloc(fpsize);
1509	if (!newfps)
1510		return -ENOMEM;
1511	newfps->size = ksize;
1512	newfps->user_size = usize;
1513	newfps->is_valloc = true;
1514
1515	/*
1516	 * When a guest FPU is supplied, use @guest_fpu->fpstate
1517	 * as reference independent whether it is in use or not.
1518	 */
1519	curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1520
1521	/* Determine whether @curfps is the active fpstate */
1522	in_use = fpu->fpstate == curfps;
1523
1524	if (guest_fpu) {
1525		newfps->is_guest = true;
1526		newfps->is_confidential = curfps->is_confidential;
1527		newfps->in_use = curfps->in_use;
1528		guest_fpu->xfeatures |= xfeatures;
1529		guest_fpu->uabi_size = usize;
1530	}
1531
1532	fpregs_lock();
1533	/*
1534	 * If @curfps is in use, ensure that the current state is in the
1535	 * registers before swapping fpstate as that might invalidate it
1536	 * due to layout changes.
1537	 */
1538	if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1539		fpregs_restore_userregs();
1540
1541	newfps->xfeatures = curfps->xfeatures | xfeatures;
1542	newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1543	newfps->xfd = curfps->xfd & ~xfeatures;
1544
1545	/* Do the final updates within the locked region */
1546	xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1547
1548	if (guest_fpu) {
1549		guest_fpu->fpstate = newfps;
1550		/* If curfps is active, update the FPU fpstate pointer */
1551		if (in_use)
1552			fpu->fpstate = newfps;
1553	} else {
1554		fpu->fpstate = newfps;
1555	}
1556
1557	if (in_use)
1558		xfd_update_state(fpu->fpstate);
1559	fpregs_unlock();
1560
1561	/* Only free valloc'ed state */
1562	if (curfps && curfps->is_valloc)
1563		vfree(curfps);
1564
1565	return 0;
1566}
1567
1568static int validate_sigaltstack(unsigned int usize)
1569{
1570	struct task_struct *thread, *leader = current->group_leader;
1571	unsigned long framesize = get_sigframe_size();
1572
1573	lockdep_assert_held(&current->sighand->siglock);
1574
1575	/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1576	framesize -= fpu_user_cfg.max_size;
1577	framesize += usize;
1578	for_each_thread(leader, thread) {
1579		if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1580			return -ENOSPC;
1581	}
1582	return 0;
1583}
1584
1585static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1586{
1587	/*
1588	 * This deliberately does not exclude !XSAVES as we still might
1589	 * decide to optionally context switch XCR0 or talk the silicon
1590	 * vendors into extending XFD for the pre AMX states, especially
1591	 * AVX512.
1592	 */
1593	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
1594	struct fpu *fpu = &current->group_leader->thread.fpu;
1595	struct fpu_state_perm *perm;
1596	unsigned int ksize, usize;
1597	u64 mask;
1598	int ret = 0;
1599
1600	/* Check whether fully enabled */
1601	if ((permitted & requested) == requested)
1602		return 0;
 
1603
1604	/* Calculate the resulting kernel state size */
1605	mask = permitted | requested;
1606	/* Take supervisor states into account on the host */
1607	if (!guest)
1608		mask |= xfeatures_mask_supervisor();
1609	ksize = xstate_calculate_size(mask, compacted);
1610
1611	/* Calculate the resulting user state size */
1612	mask &= XFEATURE_MASK_USER_SUPPORTED;
1613	usize = xstate_calculate_size(mask, false);
1614
1615	if (!guest) {
1616		ret = validate_sigaltstack(usize);
1617		if (ret)
1618			return ret;
1619	}
1620
1621	perm = guest ? &fpu->guest_perm : &fpu->perm;
1622	/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1623	WRITE_ONCE(perm->__state_perm, mask);
1624	/* Protected by sighand lock */
1625	perm->__state_size = ksize;
1626	perm->__user_state_size = usize;
1627	return ret;
1628}
1629
1630/*
1631 * Permissions array to map facilities with more than one component
 
 
 
1632 */
1633static const u64 xstate_prctl_req[XFEATURE_MAX] = {
1634	[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1635};
 
 
 
 
 
1636
1637static int xstate_request_perm(unsigned long idx, bool guest)
1638{
1639	u64 permitted, requested;
1640	int ret;
1641
1642	if (idx >= XFEATURE_MAX)
1643		return -EINVAL;
1644
1645	/*
1646	 * Look up the facility mask which can require more than
1647	 * one xstate component.
1648	 */
1649	idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1650	requested = xstate_prctl_req[idx];
1651	if (!requested)
1652		return -EOPNOTSUPP;
1653
1654	if ((fpu_user_cfg.max_features & requested) != requested)
1655		return -EOPNOTSUPP;
1656
1657	/* Lockless quick check */
1658	permitted = xstate_get_group_perm(guest);
1659	if ((permitted & requested) == requested)
1660		return 0;
1661
1662	/* Protect against concurrent modifications */
1663	spin_lock_irq(&current->sighand->siglock);
1664	permitted = xstate_get_group_perm(guest);
1665
1666	/* First vCPU allocation locks the permissions. */
1667	if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1668		ret = -EBUSY;
1669	else
1670		ret = __xstate_request_perm(permitted, requested, guest);
1671	spin_unlock_irq(&current->sighand->siglock);
1672	return ret;
1673}
1674
1675int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1676{
1677	u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1678	struct fpu_state_perm *perm;
1679	unsigned int ksize, usize;
1680	struct fpu *fpu;
1681
1682	if (!xfd_event) {
1683		if (!guest_fpu)
1684			pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1685		return 0;
1686	}
1687
1688	/* Protect against concurrent modifications */
1689	spin_lock_irq(&current->sighand->siglock);
1690
1691	/* If not permitted let it die */
1692	if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1693		spin_unlock_irq(&current->sighand->siglock);
1694		return -EPERM;
1695	}
1696
1697	fpu = &current->group_leader->thread.fpu;
1698	perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1699	ksize = perm->__state_size;
1700	usize = perm->__user_state_size;
1701
1702	/*
1703	 * The feature is permitted. State size is sufficient.  Dropping
1704	 * the lock is safe here even if more features are added from
1705	 * another task, the retrieved buffer sizes are valid for the
1706	 * currently requested feature(s).
1707	 */
1708	spin_unlock_irq(&current->sighand->siglock);
1709
1710	/*
1711	 * Try to allocate a new fpstate. If that fails there is no way
1712	 * out.
1713	 */
1714	if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1715		return -EFAULT;
1716	return 0;
1717}
1718
1719int xfd_enable_feature(u64 xfd_err)
1720{
1721	return __xfd_enable_feature(xfd_err, NULL);
1722}
1723
1724#else /* CONFIG_X86_64 */
1725static inline int xstate_request_perm(unsigned long idx, bool guest)
1726{
1727	return -EPERM;
1728}
1729#endif  /* !CONFIG_X86_64 */
1730
1731u64 xstate_get_guest_group_perm(void)
1732{
1733	return xstate_get_group_perm(true);
1734}
1735EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
1736
1737/**
1738 * fpu_xstate_prctl - xstate permission operations
1739 * @option:	A subfunction of arch_prctl()
1740 * @arg2:	option argument
1741 * Return:	0 if successful; otherwise, an error code
1742 *
1743 * Option arguments:
1744 *
1745 * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1746 * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1747 * ARCH_REQ_XCOMP_PERM: Facility number requested
1748 *
1749 * For facilities which require more than one XSTATE component, the request
1750 * must be the highest state component number related to that facility,
1751 * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1752 * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1753 */
1754long fpu_xstate_prctl(int option, unsigned long arg2)
1755{
1756	u64 __user *uptr = (u64 __user *)arg2;
1757	u64 permitted, supported;
1758	unsigned long idx = arg2;
1759	bool guest = false;
1760
1761	switch (option) {
1762	case ARCH_GET_XCOMP_SUPP:
1763		supported = fpu_user_cfg.max_features |	fpu_user_cfg.legacy_features;
1764		return put_user(supported, uptr);
1765
1766	case ARCH_GET_XCOMP_PERM:
1767		/*
1768		 * Lockless snapshot as it can also change right after the
1769		 * dropping the lock.
1770		 */
1771		permitted = xstate_get_host_group_perm();
1772		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1773		return put_user(permitted, uptr);
1774
1775	case ARCH_GET_XCOMP_GUEST_PERM:
1776		permitted = xstate_get_guest_group_perm();
1777		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1778		return put_user(permitted, uptr);
1779
1780	case ARCH_REQ_XCOMP_GUEST_PERM:
1781		guest = true;
1782		fallthrough;
1783
1784	case ARCH_REQ_XCOMP_PERM:
1785		if (!IS_ENABLED(CONFIG_X86_64))
1786			return -EOPNOTSUPP;
1787
1788		return xstate_request_perm(idx, guest);
1789
1790	default:
1791		return -EINVAL;
1792	}
1793}
1794
1795#ifdef CONFIG_PROC_PID_ARCH_STATUS
1796/*
1797 * Report the amount of time elapsed in millisecond since last AVX512
1798 * use in the task.
1799 */
1800static void avx512_status(struct seq_file *m, struct task_struct *task)
1801{
1802	unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
1803	long delta;
1804
1805	if (!timestamp) {
1806		/*
1807		 * Report -1 if no AVX512 usage
1808		 */
1809		delta = -1;
1810	} else {
1811		delta = (long)(jiffies - timestamp);
1812		/*
1813		 * Cap to LONG_MAX if time difference > LONG_MAX
1814		 */
1815		if (delta < 0)
1816			delta = LONG_MAX;
1817		delta = jiffies_to_msecs(delta);
1818	}
1819
1820	seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
1821	seq_putc(m, '\n');
1822}
1823
1824/*
1825 * Report architecture specific information
1826 */
1827int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
1828			struct pid *pid, struct task_struct *task)
1829{
1830	/*
1831	 * Report AVX512 state if the processor and build option supported.
1832	 */
1833	if (cpu_feature_enabled(X86_FEATURE_AVX512F))
1834		avx512_status(m, task);
1835
1836	return 0;
1837}
1838#endif /* CONFIG_PROC_PID_ARCH_STATUS */