Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.17.
  1/*
  2 *  Copyright (C) 1994 Linus Torvalds
  3 *
  4 *  Pentium III FXSR, SSE support
  5 *  General FPU state handling cleanups
  6 *	Gareth Hughes <gareth@valinux.com>, May 2000
  7 */
  8#include <linux/module.h>
  9#include <linux/regset.h>
 10#include <linux/sched.h>
 11#include <linux/slab.h>
 12
 13#include <asm/sigcontext.h>
 14#include <asm/processor.h>
 15#include <asm/math_emu.h>
 16#include <asm/uaccess.h>
 17#include <asm/ptrace.h>
 18#include <asm/i387.h>
 19#include <asm/fpu-internal.h>
 20#include <asm/user.h>
 21
 22/*
 23 * Were we in an interrupt that interrupted kernel mode?
 24 *
 25 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
 26 * pair does nothing at all: the thread must not have fpu (so
 27 * that we don't try to save the FPU state), and TS must
 28 * be set (so that the clts/stts pair does nothing that is
 29 * visible in the interrupted kernel thread).
 30 *
 31 * Except for the eagerfpu case when we return 1 unless we've already
 32 * been eager and saved the state in kernel_fpu_begin().
 33 */
 34static inline bool interrupted_kernel_fpu_idle(void)
 35{
 36	if (use_eager_fpu())
 37		return __thread_has_fpu(current);
 38
 39	return !__thread_has_fpu(current) &&
 40		(read_cr0() & X86_CR0_TS);
 41}
 42
 43/*
 44 * Were we in user mode (or vm86 mode) when we were
 45 * interrupted?
 46 *
 47 * Doing kernel_fpu_begin/end() is ok if we are running
 48 * in an interrupt context from user mode - we'll just
 49 * save the FPU state as required.
 50 */
 51static inline bool interrupted_user_mode(void)
 52{
 53	struct pt_regs *regs = get_irq_regs();
 54	return regs && user_mode_vm(regs);
 55}
 56
 57/*
 58 * Can we use the FPU in kernel mode with the
 59 * whole "kernel_fpu_begin/end()" sequence?
 60 *
 61 * It's always ok in process context (ie "not interrupt")
 62 * but it is sometimes ok even from an irq.
 63 */
 64bool irq_fpu_usable(void)
 65{
 66	return !in_interrupt() ||
 67		interrupted_user_mode() ||
 68		interrupted_kernel_fpu_idle();
 69}
 70EXPORT_SYMBOL(irq_fpu_usable);
 71
 72void __kernel_fpu_begin(void)
 73{
 74	struct task_struct *me = current;
 75
 76	if (__thread_has_fpu(me)) {
 77		__thread_clear_has_fpu(me);
 78		__save_init_fpu(me);
 79		/* We do 'stts()' in __kernel_fpu_end() */
 80	} else if (!use_eager_fpu()) {
 81		this_cpu_write(fpu_owner_task, NULL);
 82		clts();
 83	}
 84}
 85EXPORT_SYMBOL(__kernel_fpu_begin);
 86
 87void __kernel_fpu_end(void)
 88{
 89	if (use_eager_fpu()) {
 90		/*
 91		 * For eager fpu, most the time, tsk_used_math() is true.
 92		 * Restore the user math as we are done with the kernel usage.
 93		 * At few instances during thread exit, signal handling etc,
 94		 * tsk_used_math() is false. Those few places will take proper
 95		 * actions, so we don't need to restore the math here.
 96		 */
 97		if (likely(tsk_used_math(current)))
 98			math_state_restore();
 99	} else {
100		stts();
101	}
102}
103EXPORT_SYMBOL(__kernel_fpu_end);
104
105void unlazy_fpu(struct task_struct *tsk)
106{
107	preempt_disable();
108	if (__thread_has_fpu(tsk)) {
109		__save_init_fpu(tsk);
110		__thread_fpu_end(tsk);
111	} else
112		tsk->thread.fpu_counter = 0;
113	preempt_enable();
114}
115EXPORT_SYMBOL(unlazy_fpu);
116
117unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
118unsigned int xstate_size;
119EXPORT_SYMBOL_GPL(xstate_size);
120static struct i387_fxsave_struct fx_scratch;
121
122static void mxcsr_feature_mask_init(void)
123{
124	unsigned long mask = 0;
125
126	if (cpu_has_fxsr) {
127		memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
128		asm volatile("fxsave %0" : "+m" (fx_scratch));
129		mask = fx_scratch.mxcsr_mask;
130		if (mask == 0)
131			mask = 0x0000ffbf;
132	}
133	mxcsr_feature_mask &= mask;
134}
135
136static void init_thread_xstate(void)
137{
138	/*
139	 * Note that xstate_size might be overwriten later during
140	 * xsave_init().
141	 */
142
143	if (!cpu_has_fpu) {
144		/*
145		 * Disable xsave as we do not support it if i387
146		 * emulation is enabled.
147		 */
148		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
149		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
150		xstate_size = sizeof(struct i387_soft_struct);
151		return;
152	}
153
154	if (cpu_has_fxsr)
155		xstate_size = sizeof(struct i387_fxsave_struct);
156	else
157		xstate_size = sizeof(struct i387_fsave_struct);
158}
159
160/*
161 * Called at bootup to set up the initial FPU state that is later cloned
162 * into all processes.
163 */
164
165void fpu_init(void)
166{
167	unsigned long cr0;
168	unsigned long cr4_mask = 0;
169
170#ifndef CONFIG_MATH_EMULATION
171	if (!cpu_has_fpu) {
172		pr_emerg("No FPU found and no math emulation present\n");
173		pr_emerg("Giving up\n");
174		for (;;)
175			asm volatile("hlt");
176	}
177#endif
178	if (cpu_has_fxsr)
179		cr4_mask |= X86_CR4_OSFXSR;
180	if (cpu_has_xmm)
181		cr4_mask |= X86_CR4_OSXMMEXCPT;
182	if (cr4_mask)
183		set_in_cr4(cr4_mask);
184
185	cr0 = read_cr0();
186	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
187	if (!cpu_has_fpu)
188		cr0 |= X86_CR0_EM;
189	write_cr0(cr0);
190
191	/*
192	 * init_thread_xstate is only called once to avoid overriding
193	 * xstate_size during boot time or during CPU hotplug.
194	 */
195	if (xstate_size == 0)
196		init_thread_xstate();
197
198	mxcsr_feature_mask_init();
199	xsave_init();
200	eager_fpu_init();
201}
202
203void fpu_finit(struct fpu *fpu)
204{
205	if (!cpu_has_fpu) {
206		finit_soft_fpu(&fpu->state->soft);
207		return;
208	}
209
210	if (cpu_has_fxsr) {
211		fx_finit(&fpu->state->fxsave);
212	} else {
213		struct i387_fsave_struct *fp = &fpu->state->fsave;
214		memset(fp, 0, xstate_size);
215		fp->cwd = 0xffff037fu;
216		fp->swd = 0xffff0000u;
217		fp->twd = 0xffffffffu;
218		fp->fos = 0xffff0000u;
219	}
220}
221EXPORT_SYMBOL_GPL(fpu_finit);
222
223/*
224 * The _current_ task is using the FPU for the first time
225 * so initialize it and set the mxcsr to its default
226 * value at reset if we support XMM instructions and then
227 * remember the current task has used the FPU.
228 */
229int init_fpu(struct task_struct *tsk)
230{
231	int ret;
232
233	if (tsk_used_math(tsk)) {
234		if (cpu_has_fpu && tsk == current)
235			unlazy_fpu(tsk);
236		tsk->thread.fpu.last_cpu = ~0;
237		return 0;
238	}
239
240	/*
241	 * Memory allocation at the first usage of the FPU and other state.
242	 */
243	ret = fpu_alloc(&tsk->thread.fpu);
244	if (ret)
245		return ret;
246
247	fpu_finit(&tsk->thread.fpu);
248
249	set_stopped_child_used_math(tsk);
250	return 0;
251}
252EXPORT_SYMBOL_GPL(init_fpu);
253
254/*
255 * The xstateregs_active() routine is the same as the fpregs_active() routine,
256 * as the "regset->n" for the xstate regset will be updated based on the feature
257 * capabilites supported by the xsave.
258 */
259int fpregs_active(struct task_struct *target, const struct user_regset *regset)
260{
261	return tsk_used_math(target) ? regset->n : 0;
262}
263
264int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
265{
266	return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
267}
268
269int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
270		unsigned int pos, unsigned int count,
271		void *kbuf, void __user *ubuf)
272{
273	int ret;
274
275	if (!cpu_has_fxsr)
276		return -ENODEV;
277
278	ret = init_fpu(target);
279	if (ret)
280		return ret;
281
282	sanitize_i387_state(target);
283
284	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
285				   &target->thread.fpu.state->fxsave, 0, -1);
286}
287
288int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
289		unsigned int pos, unsigned int count,
290		const void *kbuf, const void __user *ubuf)
291{
292	int ret;
293
294	if (!cpu_has_fxsr)
295		return -ENODEV;
296
297	ret = init_fpu(target);
298	if (ret)
299		return ret;
300
301	sanitize_i387_state(target);
302
303	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
304				 &target->thread.fpu.state->fxsave, 0, -1);
305
306	/*
307	 * mxcsr reserved bits must be masked to zero for security reasons.
308	 */
309	target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
310
311	/*
312	 * update the header bits in the xsave header, indicating the
313	 * presence of FP and SSE state.
314	 */
315	if (cpu_has_xsave)
316		target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
317
318	return ret;
319}
320
321int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
322		unsigned int pos, unsigned int count,
323		void *kbuf, void __user *ubuf)
324{
325	int ret;
326
327	if (!cpu_has_xsave)
328		return -ENODEV;
329
330	ret = init_fpu(target);
331	if (ret)
332		return ret;
333
334	/*
335	 * Copy the 48bytes defined by the software first into the xstate
336	 * memory layout in the thread struct, so that we can copy the entire
337	 * xstateregs to the user using one user_regset_copyout().
338	 */
339	memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
340	       xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
341
342	/*
343	 * Copy the xstate memory layout.
344	 */
345	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
346				  &target->thread.fpu.state->xsave, 0, -1);
347	return ret;
348}
349
350int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
351		  unsigned int pos, unsigned int count,
352		  const void *kbuf, const void __user *ubuf)
353{
354	int ret;
355	struct xsave_hdr_struct *xsave_hdr;
356
357	if (!cpu_has_xsave)
358		return -ENODEV;
359
360	ret = init_fpu(target);
361	if (ret)
362		return ret;
363
364	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
365				 &target->thread.fpu.state->xsave, 0, -1);
366
367	/*
368	 * mxcsr reserved bits must be masked to zero for security reasons.
369	 */
370	target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
371
372	xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
373
374	xsave_hdr->xstate_bv &= pcntxt_mask;
375	/*
376	 * These bits must be zero.
377	 */
378	xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
379
380	return ret;
381}
382
383#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
384
385/*
386 * FPU tag word conversions.
387 */
388
389static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
390{
391	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
392
393	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
394	tmp = ~twd;
395	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
396	/* and move the valid bits to the lower byte. */
397	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
398	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
399	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
400
401	return tmp;
402}
403
404#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16)
405#define FP_EXP_TAG_VALID	0
406#define FP_EXP_TAG_ZERO		1
407#define FP_EXP_TAG_SPECIAL	2
408#define FP_EXP_TAG_EMPTY	3
409
410static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
411{
412	struct _fpxreg *st;
413	u32 tos = (fxsave->swd >> 11) & 7;
414	u32 twd = (unsigned long) fxsave->twd;
415	u32 tag;
416	u32 ret = 0xffff0000u;
417	int i;
418
419	for (i = 0; i < 8; i++, twd >>= 1) {
420		if (twd & 0x1) {
421			st = FPREG_ADDR(fxsave, (i - tos) & 7);
422
423			switch (st->exponent & 0x7fff) {
424			case 0x7fff:
425				tag = FP_EXP_TAG_SPECIAL;
426				break;
427			case 0x0000:
428				if (!st->significand[0] &&
429				    !st->significand[1] &&
430				    !st->significand[2] &&
431				    !st->significand[3])
432					tag = FP_EXP_TAG_ZERO;
433				else
434					tag = FP_EXP_TAG_SPECIAL;
435				break;
436			default:
437				if (st->significand[3] & 0x8000)
438					tag = FP_EXP_TAG_VALID;
439				else
440					tag = FP_EXP_TAG_SPECIAL;
441				break;
442			}
443		} else {
444			tag = FP_EXP_TAG_EMPTY;
445		}
446		ret |= tag << (2 * i);
447	}
448	return ret;
449}
450
451/*
452 * FXSR floating point environment conversions.
453 */
454
455void
456convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
457{
458	struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
459	struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
460	struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
461	int i;
462
463	env->cwd = fxsave->cwd | 0xffff0000u;
464	env->swd = fxsave->swd | 0xffff0000u;
465	env->twd = twd_fxsr_to_i387(fxsave);
466
467#ifdef CONFIG_X86_64
468	env->fip = fxsave->rip;
469	env->foo = fxsave->rdp;
470	/*
471	 * should be actually ds/cs at fpu exception time, but
472	 * that information is not available in 64bit mode.
473	 */
474	env->fcs = task_pt_regs(tsk)->cs;
475	if (tsk == current) {
476		savesegment(ds, env->fos);
477	} else {
478		env->fos = tsk->thread.ds;
479	}
480	env->fos |= 0xffff0000;
481#else
482	env->fip = fxsave->fip;
483	env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
484	env->foo = fxsave->foo;
485	env->fos = fxsave->fos;
486#endif
487
488	for (i = 0; i < 8; ++i)
489		memcpy(&to[i], &from[i], sizeof(to[0]));
490}
491
492void convert_to_fxsr(struct task_struct *tsk,
493		     const struct user_i387_ia32_struct *env)
494
495{
496	struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
497	struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
498	struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
499	int i;
500
501	fxsave->cwd = env->cwd;
502	fxsave->swd = env->swd;
503	fxsave->twd = twd_i387_to_fxsr(env->twd);
504	fxsave->fop = (u16) ((u32) env->fcs >> 16);
505#ifdef CONFIG_X86_64
506	fxsave->rip = env->fip;
507	fxsave->rdp = env->foo;
508	/* cs and ds ignored */
509#else
510	fxsave->fip = env->fip;
511	fxsave->fcs = (env->fcs & 0xffff);
512	fxsave->foo = env->foo;
513	fxsave->fos = env->fos;
514#endif
515
516	for (i = 0; i < 8; ++i)
517		memcpy(&to[i], &from[i], sizeof(from[0]));
518}
519
520int fpregs_get(struct task_struct *target, const struct user_regset *regset,
521	       unsigned int pos, unsigned int count,
522	       void *kbuf, void __user *ubuf)
523{
524	struct user_i387_ia32_struct env;
525	int ret;
526
527	ret = init_fpu(target);
528	if (ret)
529		return ret;
530
531	if (!static_cpu_has(X86_FEATURE_FPU))
532		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
533
534	if (!cpu_has_fxsr)
535		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
536					   &target->thread.fpu.state->fsave, 0,
537					   -1);
538
539	sanitize_i387_state(target);
540
541	if (kbuf && pos == 0 && count == sizeof(env)) {
542		convert_from_fxsr(kbuf, target);
543		return 0;
544	}
545
546	convert_from_fxsr(&env, target);
547
548	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
549}
550
551int fpregs_set(struct task_struct *target, const struct user_regset *regset,
552	       unsigned int pos, unsigned int count,
553	       const void *kbuf, const void __user *ubuf)
554{
555	struct user_i387_ia32_struct env;
556	int ret;
557
558	ret = init_fpu(target);
559	if (ret)
560		return ret;
561
562	sanitize_i387_state(target);
563
564	if (!static_cpu_has(X86_FEATURE_FPU))
565		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
566
567	if (!cpu_has_fxsr)
568		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
569					  &target->thread.fpu.state->fsave, 0,
570					  -1);
571
572	if (pos > 0 || count < sizeof(env))
573		convert_from_fxsr(&env, target);
574
575	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
576	if (!ret)
577		convert_to_fxsr(target, &env);
578
579	/*
580	 * update the header bit in the xsave header, indicating the
581	 * presence of FP.
582	 */
583	if (cpu_has_xsave)
584		target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
585	return ret;
586}
587
588/*
589 * FPU state for core dumps.
590 * This is only used for a.out dumps now.
591 * It is declared generically using elf_fpregset_t (which is
592 * struct user_i387_struct) but is in fact only used for 32-bit
593 * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
594 */
595int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
596{
597	struct task_struct *tsk = current;
598	int fpvalid;
599
600	fpvalid = !!used_math();
601	if (fpvalid)
602		fpvalid = !fpregs_get(tsk, NULL,
603				      0, sizeof(struct user_i387_ia32_struct),
604				      fpu, NULL);
605
606	return fpvalid;
607}
608EXPORT_SYMBOL(dump_fpu);
609
610#endif	/* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
611
612static int __init no_387(char *s)
613{
614	setup_clear_cpu_cap(X86_FEATURE_FPU);
615	return 1;
616}
617
618__setup("no387", no_387);
619
620void fpu_detect(struct cpuinfo_x86 *c)
621{
622	unsigned long cr0;
623	u16 fsw, fcw;
624
625	fsw = fcw = 0xffff;
626
627	cr0 = read_cr0();
628	cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
629	write_cr0(cr0);
630
631	asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
632		     : "+m" (fsw), "+m" (fcw));
633
634	if (fsw == 0 && (fcw & 0x103f) == 0x003f)
635		set_cpu_cap(c, X86_FEATURE_FPU);
636	else
637		clear_cpu_cap(c, X86_FEATURE_FPU);
638
639	/* The final cr0 value is set in fpu_init() */
640}