Linux Audio

Check our new training course

Loading...
v6.13.7
  1/* SPDX-License-Identifier: GPL-2.0 */
  2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  3
  4#include <linux/kvm_host.h>
  5#include "x86.h"
  6#include "kvm_cache_regs.h"
  7#include "kvm_emulate.h"
  8#include "smm.h"
  9#include "cpuid.h"
 10#include "trace.h"
 11
 12#define CHECK_SMRAM32_OFFSET(field, offset) \
 13	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
 14
 15#define CHECK_SMRAM64_OFFSET(field, offset) \
 16	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
 17
 18static void check_smram_offsets(void)
 19{
 20	/* 32 bit SMRAM image */
 21	CHECK_SMRAM32_OFFSET(reserved1,			0xFE00);
 22	CHECK_SMRAM32_OFFSET(smbase,			0xFEF8);
 23	CHECK_SMRAM32_OFFSET(smm_revision,		0xFEFC);
 24	CHECK_SMRAM32_OFFSET(io_inst_restart,		0xFF00);
 25	CHECK_SMRAM32_OFFSET(auto_hlt_restart,		0xFF02);
 26	CHECK_SMRAM32_OFFSET(io_restart_rdi,		0xFF04);
 27	CHECK_SMRAM32_OFFSET(io_restart_rcx,		0xFF08);
 28	CHECK_SMRAM32_OFFSET(io_restart_rsi,		0xFF0C);
 29	CHECK_SMRAM32_OFFSET(io_restart_rip,		0xFF10);
 30	CHECK_SMRAM32_OFFSET(cr4,			0xFF14);
 31	CHECK_SMRAM32_OFFSET(reserved2,			0xFF18);
 32	CHECK_SMRAM32_OFFSET(int_shadow,		0xFF1A);
 33	CHECK_SMRAM32_OFFSET(reserved3,			0xFF1B);
 34	CHECK_SMRAM32_OFFSET(ds,			0xFF2C);
 35	CHECK_SMRAM32_OFFSET(fs,			0xFF38);
 36	CHECK_SMRAM32_OFFSET(gs,			0xFF44);
 37	CHECK_SMRAM32_OFFSET(idtr,			0xFF50);
 38	CHECK_SMRAM32_OFFSET(tr,			0xFF5C);
 39	CHECK_SMRAM32_OFFSET(gdtr,			0xFF6C);
 40	CHECK_SMRAM32_OFFSET(ldtr,			0xFF78);
 41	CHECK_SMRAM32_OFFSET(es,			0xFF84);
 42	CHECK_SMRAM32_OFFSET(cs,			0xFF90);
 43	CHECK_SMRAM32_OFFSET(ss,			0xFF9C);
 44	CHECK_SMRAM32_OFFSET(es_sel,			0xFFA8);
 45	CHECK_SMRAM32_OFFSET(cs_sel,			0xFFAC);
 46	CHECK_SMRAM32_OFFSET(ss_sel,			0xFFB0);
 47	CHECK_SMRAM32_OFFSET(ds_sel,			0xFFB4);
 48	CHECK_SMRAM32_OFFSET(fs_sel,			0xFFB8);
 49	CHECK_SMRAM32_OFFSET(gs_sel,			0xFFBC);
 50	CHECK_SMRAM32_OFFSET(ldtr_sel,			0xFFC0);
 51	CHECK_SMRAM32_OFFSET(tr_sel,			0xFFC4);
 52	CHECK_SMRAM32_OFFSET(dr7,			0xFFC8);
 53	CHECK_SMRAM32_OFFSET(dr6,			0xFFCC);
 54	CHECK_SMRAM32_OFFSET(gprs,			0xFFD0);
 55	CHECK_SMRAM32_OFFSET(eip,			0xFFF0);
 56	CHECK_SMRAM32_OFFSET(eflags,			0xFFF4);
 57	CHECK_SMRAM32_OFFSET(cr3,			0xFFF8);
 58	CHECK_SMRAM32_OFFSET(cr0,			0xFFFC);
 59
 60	/* 64 bit SMRAM image */
 61	CHECK_SMRAM64_OFFSET(es,			0xFE00);
 62	CHECK_SMRAM64_OFFSET(cs,			0xFE10);
 63	CHECK_SMRAM64_OFFSET(ss,			0xFE20);
 64	CHECK_SMRAM64_OFFSET(ds,			0xFE30);
 65	CHECK_SMRAM64_OFFSET(fs,			0xFE40);
 66	CHECK_SMRAM64_OFFSET(gs,			0xFE50);
 67	CHECK_SMRAM64_OFFSET(gdtr,			0xFE60);
 68	CHECK_SMRAM64_OFFSET(ldtr,			0xFE70);
 69	CHECK_SMRAM64_OFFSET(idtr,			0xFE80);
 70	CHECK_SMRAM64_OFFSET(tr,			0xFE90);
 71	CHECK_SMRAM64_OFFSET(io_restart_rip,		0xFEA0);
 72	CHECK_SMRAM64_OFFSET(io_restart_rcx,		0xFEA8);
 73	CHECK_SMRAM64_OFFSET(io_restart_rsi,		0xFEB0);
 74	CHECK_SMRAM64_OFFSET(io_restart_rdi,		0xFEB8);
 75	CHECK_SMRAM64_OFFSET(io_restart_dword,		0xFEC0);
 76	CHECK_SMRAM64_OFFSET(reserved1,			0xFEC4);
 77	CHECK_SMRAM64_OFFSET(io_inst_restart,		0xFEC8);
 78	CHECK_SMRAM64_OFFSET(auto_hlt_restart,		0xFEC9);
 79	CHECK_SMRAM64_OFFSET(amd_nmi_mask,		0xFECA);
 80	CHECK_SMRAM64_OFFSET(int_shadow,		0xFECB);
 81	CHECK_SMRAM64_OFFSET(reserved2,			0xFECC);
 82	CHECK_SMRAM64_OFFSET(efer,			0xFED0);
 83	CHECK_SMRAM64_OFFSET(svm_guest_flag,		0xFED8);
 84	CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,	0xFEE0);
 85	CHECK_SMRAM64_OFFSET(svm_guest_virtual_int,	0xFEE8);
 86	CHECK_SMRAM64_OFFSET(reserved3,			0xFEF0);
 87	CHECK_SMRAM64_OFFSET(smm_revison,		0xFEFC);
 88	CHECK_SMRAM64_OFFSET(smbase,			0xFF00);
 89	CHECK_SMRAM64_OFFSET(reserved4,			0xFF04);
 90	CHECK_SMRAM64_OFFSET(ssp,			0xFF18);
 91	CHECK_SMRAM64_OFFSET(svm_guest_pat,		0xFF20);
 92	CHECK_SMRAM64_OFFSET(svm_host_efer,		0xFF28);
 93	CHECK_SMRAM64_OFFSET(svm_host_cr4,		0xFF30);
 94	CHECK_SMRAM64_OFFSET(svm_host_cr3,		0xFF38);
 95	CHECK_SMRAM64_OFFSET(svm_host_cr0,		0xFF40);
 96	CHECK_SMRAM64_OFFSET(cr4,			0xFF48);
 97	CHECK_SMRAM64_OFFSET(cr3,			0xFF50);
 98	CHECK_SMRAM64_OFFSET(cr0,			0xFF58);
 99	CHECK_SMRAM64_OFFSET(dr7,			0xFF60);
100	CHECK_SMRAM64_OFFSET(dr6,			0xFF68);
101	CHECK_SMRAM64_OFFSET(rflags,			0xFF70);
102	CHECK_SMRAM64_OFFSET(rip,			0xFF78);
103	CHECK_SMRAM64_OFFSET(gprs,			0xFF80);
104
105	BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
106}
107
108#undef CHECK_SMRAM64_OFFSET
109#undef CHECK_SMRAM32_OFFSET
110
111
112void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
113{
 
 
114	trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
115
116	if (entering_smm) {
117		vcpu->arch.hflags |= HF_SMM_MASK;
118	} else {
119		vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
120
121		/* Process a latched INIT or SMI, if any.  */
122		kvm_make_request(KVM_REQ_EVENT, vcpu);
123
124		/*
125		 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
126		 * on SMM exit we still need to reload them from
127		 * guest memory
128		 */
129		vcpu->arch.pdptrs_from_userspace = false;
130	}
131
132	kvm_mmu_reset_context(vcpu);
133}
134
135void process_smi(struct kvm_vcpu *vcpu)
136{
137	vcpu->arch.smi_pending = true;
138	kvm_make_request(KVM_REQ_EVENT, vcpu);
139}
140
141static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
142{
143	u32 flags = 0;
144	flags |= seg->g       << 23;
145	flags |= seg->db      << 22;
146	flags |= seg->l       << 21;
147	flags |= seg->avl     << 20;
148	flags |= seg->present << 15;
149	flags |= seg->dpl     << 13;
150	flags |= seg->s       << 12;
151	flags |= seg->type    << 8;
152	return flags;
153}
154
155static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
156				  struct kvm_smm_seg_state_32 *state,
157				  u32 *selector, int n)
158{
159	struct kvm_segment seg;
160
161	kvm_get_segment(vcpu, &seg, n);
162	*selector = seg.selector;
163	state->base = seg.base;
164	state->limit = seg.limit;
165	state->flags = enter_smm_get_segment_flags(&seg);
166}
167
168#ifdef CONFIG_X86_64
169static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
170				  struct kvm_smm_seg_state_64 *state,
171				  int n)
172{
173	struct kvm_segment seg;
174
175	kvm_get_segment(vcpu, &seg, n);
176	state->selector = seg.selector;
177	state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
178	state->limit = seg.limit;
179	state->base = seg.base;
180}
181#endif
182
183static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
184				    struct kvm_smram_state_32 *smram)
185{
186	struct desc_ptr dt;
 
187	int i;
188
189	smram->cr0     = kvm_read_cr0(vcpu);
190	smram->cr3     = kvm_read_cr3(vcpu);
191	smram->eflags  = kvm_get_rflags(vcpu);
192	smram->eip     = kvm_rip_read(vcpu);
193
194	for (i = 0; i < 8; i++)
195		smram->gprs[i] = kvm_register_read_raw(vcpu, i);
196
197	smram->dr6     = (u32)vcpu->arch.dr6;
198	smram->dr7     = (u32)vcpu->arch.dr7;
 
 
199
200	enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
201	enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
202
203	kvm_x86_call(get_gdt)(vcpu, &dt);
204	smram->gdtr.base = dt.address;
205	smram->gdtr.limit = dt.size;
206
207	kvm_x86_call(get_idt)(vcpu, &dt);
208	smram->idtr.base = dt.address;
209	smram->idtr.limit = dt.size;
210
211	enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
212	enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
213	enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
214
215	enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
216	enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
217	enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
218
219	smram->cr4 = kvm_read_cr4(vcpu);
220	smram->smm_revision = 0x00020000;
221	smram->smbase = vcpu->arch.smbase;
222
223	smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
224}
225
226#ifdef CONFIG_X86_64
227static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
228				    struct kvm_smram_state_64 *smram)
229{
230	struct desc_ptr dt;
 
231	int i;
232
233	for (i = 0; i < 16; i++)
234		smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
235
236	smram->rip    = kvm_rip_read(vcpu);
237	smram->rflags = kvm_get_rflags(vcpu);
238
239	smram->dr6 = vcpu->arch.dr6;
240	smram->dr7 = vcpu->arch.dr7;
 
 
 
241
242	smram->cr0 = kvm_read_cr0(vcpu);
243	smram->cr3 = kvm_read_cr3(vcpu);
244	smram->cr4 = kvm_read_cr4(vcpu);
245
246	smram->smbase = vcpu->arch.smbase;
247	smram->smm_revison = 0x00020064;
248
249	smram->efer = vcpu->arch.efer;
250
251	enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
252
253	kvm_x86_call(get_idt)(vcpu, &dt);
254	smram->idtr.limit = dt.size;
255	smram->idtr.base = dt.address;
256
257	enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
258
259	kvm_x86_call(get_gdt)(vcpu, &dt);
260	smram->gdtr.limit = dt.size;
261	smram->gdtr.base = dt.address;
262
263	enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
264	enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
265	enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
266	enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
267	enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
268	enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
269
270	smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
271}
272#endif
273
274void enter_smm(struct kvm_vcpu *vcpu)
275{
276	struct kvm_segment cs, ds;
277	struct desc_ptr dt;
278	unsigned long cr0;
279	union kvm_smram smram;
280
281	check_smram_offsets();
282
283	memset(smram.bytes, 0, sizeof(smram.bytes));
284
285#ifdef CONFIG_X86_64
286	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
287		enter_smm_save_state_64(vcpu, &smram.smram64);
288	else
289#endif
290		enter_smm_save_state_32(vcpu, &smram.smram32);
291
292	/*
293	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
294	 * state (e.g. leave guest mode) after we've saved the state into the
295	 * SMM state-save area.
296	 *
297	 * Kill the VM in the unlikely case of failure, because the VM
298	 * can be in undefined state in this case.
299	 */
300	if (kvm_x86_call(enter_smm)(vcpu, &smram))
301		goto error;
302
303	kvm_smm_changed(vcpu, true);
304
305	if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
306		goto error;
307
308	if (kvm_x86_call(get_nmi_mask)(vcpu))
309		vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
310	else
311		kvm_x86_call(set_nmi_mask)(vcpu, true);
312
313	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
314	kvm_rip_write(vcpu, 0x8000);
315
316	kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
317
318	cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
319	kvm_x86_call(set_cr0)(vcpu, cr0);
 
320
321	kvm_x86_call(set_cr4)(vcpu, 0);
322
323	/* Undocumented: IDT limit is set to zero on entry to SMM.  */
324	dt.address = dt.size = 0;
325	kvm_x86_call(set_idt)(vcpu, &dt);
326
327	if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
328		goto error;
329
330	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
331	cs.base = vcpu->arch.smbase;
332
333	ds.selector = 0;
334	ds.base = 0;
335
336	cs.limit    = ds.limit = 0xffffffff;
337	cs.type     = ds.type = 0x3;
338	cs.dpl      = ds.dpl = 0;
339	cs.db       = ds.db = 0;
340	cs.s        = ds.s = 1;
341	cs.l        = ds.l = 0;
342	cs.g        = ds.g = 1;
343	cs.avl      = ds.avl = 0;
344	cs.present  = ds.present = 1;
345	cs.unusable = ds.unusable = 0;
346	cs.padding  = ds.padding = 0;
347
348	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
349	kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
350	kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
351	kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
352	kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
353	kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
354
355#ifdef CONFIG_X86_64
356	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
357		if (kvm_x86_call(set_efer)(vcpu, 0))
358			goto error;
359#endif
360
361	kvm_update_cpuid_runtime(vcpu);
362	kvm_mmu_reset_context(vcpu);
363	return;
364error:
365	kvm_vm_dead(vcpu->kvm);
366}
367
368static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
369{
370	desc->g    = (flags >> 23) & 1;
371	desc->db   = (flags >> 22) & 1;
372	desc->l    = (flags >> 21) & 1;
373	desc->avl  = (flags >> 20) & 1;
374	desc->present = (flags >> 15) & 1;
375	desc->dpl  = (flags >> 13) & 3;
376	desc->s    = (flags >> 12) & 1;
377	desc->type = (flags >>  8) & 15;
378
379	desc->unusable = !desc->present;
380	desc->padding = 0;
381}
382
383static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
384			   const struct kvm_smm_seg_state_32 *state,
385			   u16 selector, int n)
386{
387	struct kvm_segment desc;
388
389	desc.selector =           selector;
390	desc.base =               state->base;
391	desc.limit =              state->limit;
392	rsm_set_desc_flags(&desc, state->flags);
393	kvm_set_segment(vcpu, &desc, n);
394	return X86EMUL_CONTINUE;
395}
396
397#ifdef CONFIG_X86_64
398
399static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
400			   const struct kvm_smm_seg_state_64 *state,
401			   int n)
402{
403	struct kvm_segment desc;
404
405	desc.selector =           state->selector;
406	rsm_set_desc_flags(&desc, state->attributes << 8);
407	desc.limit =              state->limit;
408	desc.base =               state->base;
409	kvm_set_segment(vcpu, &desc, n);
410	return X86EMUL_CONTINUE;
411}
412#endif
413
414static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
415				    u64 cr0, u64 cr3, u64 cr4)
416{
417	int bad;
418	u64 pcid;
419
420	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
421	pcid = 0;
422	if (cr4 & X86_CR4_PCIDE) {
423		pcid = cr3 & 0xfff;
424		cr3 &= ~0xfff;
425	}
426
427	bad = kvm_set_cr3(vcpu, cr3);
428	if (bad)
429		return X86EMUL_UNHANDLEABLE;
430
431	/*
432	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
433	 * Then enable protected mode.	However, PCID cannot be enabled
434	 * if EFER.LMA=0, so set it separately.
435	 */
436	bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
437	if (bad)
438		return X86EMUL_UNHANDLEABLE;
439
440	bad = kvm_set_cr0(vcpu, cr0);
441	if (bad)
442		return X86EMUL_UNHANDLEABLE;
443
444	if (cr4 & X86_CR4_PCIDE) {
445		bad = kvm_set_cr4(vcpu, cr4);
446		if (bad)
447			return X86EMUL_UNHANDLEABLE;
448		if (pcid) {
449			bad = kvm_set_cr3(vcpu, cr3 | pcid);
450			if (bad)
451				return X86EMUL_UNHANDLEABLE;
452		}
453
454	}
455
456	return X86EMUL_CONTINUE;
457}
458
459static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
460			     const struct kvm_smram_state_32 *smstate)
461{
462	struct kvm_vcpu *vcpu = ctxt->vcpu;
463	struct desc_ptr dt;
464	int i, r;
465
466	ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
467	ctxt->_eip =  smstate->eip;
468
469	for (i = 0; i < 8; i++)
470		*reg_write(ctxt, i) = smstate->gprs[i];
471
472	if (kvm_set_dr(vcpu, 6, smstate->dr6))
473		return X86EMUL_UNHANDLEABLE;
474	if (kvm_set_dr(vcpu, 7, smstate->dr7))
475		return X86EMUL_UNHANDLEABLE;
476
477	rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
478	rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
479
480	dt.address =               smstate->gdtr.base;
481	dt.size =                  smstate->gdtr.limit;
482	kvm_x86_call(set_gdt)(vcpu, &dt);
483
484	dt.address =               smstate->idtr.base;
485	dt.size =                  smstate->idtr.limit;
486	kvm_x86_call(set_idt)(vcpu, &dt);
487
488	rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
489	rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
490	rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
491
492	rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
493	rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
494	rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
495
496	vcpu->arch.smbase = smstate->smbase;
497
498	r = rsm_enter_protected_mode(vcpu, smstate->cr0,
499					smstate->cr3, smstate->cr4);
500
501	if (r != X86EMUL_CONTINUE)
502		return r;
503
504	kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
505	ctxt->interruptibility = (u8)smstate->int_shadow;
506
507	return r;
508}
509
510#ifdef CONFIG_X86_64
511static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
512			     const struct kvm_smram_state_64 *smstate)
513{
514	struct kvm_vcpu *vcpu = ctxt->vcpu;
515	struct desc_ptr dt;
516	int i, r;
517
518	for (i = 0; i < 16; i++)
519		*reg_write(ctxt, i) = smstate->gprs[15 - i];
520
521	ctxt->_eip   = smstate->rip;
522	ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
523
524	if (kvm_set_dr(vcpu, 6, smstate->dr6))
525		return X86EMUL_UNHANDLEABLE;
526	if (kvm_set_dr(vcpu, 7, smstate->dr7))
527		return X86EMUL_UNHANDLEABLE;
528
529	vcpu->arch.smbase =         smstate->smbase;
530
531	if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
532		return X86EMUL_UNHANDLEABLE;
533
534	rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
535
536	dt.size =                   smstate->idtr.limit;
537	dt.address =                smstate->idtr.base;
538	kvm_x86_call(set_idt)(vcpu, &dt);
539
540	rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
541
542	dt.size =                   smstate->gdtr.limit;
543	dt.address =                smstate->gdtr.base;
544	kvm_x86_call(set_gdt)(vcpu, &dt);
545
546	r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
547	if (r != X86EMUL_CONTINUE)
548		return r;
549
550	rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
551	rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
552	rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
553	rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
554	rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
555	rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
556
557	kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
558	ctxt->interruptibility = (u8)smstate->int_shadow;
559
560	return X86EMUL_CONTINUE;
561}
562#endif
563
564int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
565{
566	struct kvm_vcpu *vcpu = ctxt->vcpu;
567	unsigned long cr0;
568	union kvm_smram smram;
569	u64 smbase;
570	int ret;
571
572	smbase = vcpu->arch.smbase;
573
574	ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
575	if (ret < 0)
576		return X86EMUL_UNHANDLEABLE;
577
578	if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
579		kvm_x86_call(set_nmi_mask)(vcpu, false);
580
581	kvm_smm_changed(vcpu, false);
582
583	/*
584	 * Get back to real mode, to prepare a safe state in which to load
585	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
586	 * supports long mode.
587	 */
588#ifdef CONFIG_X86_64
589	if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
590		struct kvm_segment cs_desc;
591		unsigned long cr4;
592
593		/* Zero CR4.PCIDE before CR0.PG.  */
594		cr4 = kvm_read_cr4(vcpu);
595		if (cr4 & X86_CR4_PCIDE)
596			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
597
598		/* A 32-bit code segment is required to clear EFER.LMA.  */
599		memset(&cs_desc, 0, sizeof(cs_desc));
600		cs_desc.type = 0xb;
601		cs_desc.s = cs_desc.g = cs_desc.present = 1;
602		kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
603	}
604#endif
605
606	/* For the 64-bit case, this will clear EFER.LMA.  */
607	cr0 = kvm_read_cr0(vcpu);
608	if (cr0 & X86_CR0_PE)
609		kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
610
611#ifdef CONFIG_X86_64
612	if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
613		unsigned long cr4, efer;
614
615		/* Clear CR4.PAE before clearing EFER.LME. */
616		cr4 = kvm_read_cr4(vcpu);
617		if (cr4 & X86_CR4_PAE)
618			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
619
620		/* And finally go back to 32-bit mode.  */
621		efer = 0;
622		kvm_set_msr(vcpu, MSR_EFER, efer);
623	}
624#endif
625
626	/*
627	 * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
628	 * mode should happen _after_ loading state from SMRAM.  However, KVM
629	 * piggybacks the nested VM-Enter flows (which is wrong for many other
630	 * reasons), and so nSVM/nVMX would clobber state that is loaded from
631	 * SMRAM and from the VMCS/VMCB.
632	 */
633	if (kvm_x86_call(leave_smm)(vcpu, &smram))
634		return X86EMUL_UNHANDLEABLE;
635
636#ifdef CONFIG_X86_64
637	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
638		ret = rsm_load_state_64(ctxt, &smram.smram64);
639	else
640#endif
641		ret = rsm_load_state_32(ctxt, &smram.smram32);
642
643	/*
644	 * If RSM fails and triggers shutdown, architecturally the shutdown
645	 * occurs *before* the transition to guest mode.  But due to KVM's
646	 * flawed handling of RSM to L2 (see above), the vCPU may already be
647	 * in_guest_mode().  Force the vCPU out of guest mode before delivering
648	 * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
649	 * that architecturally shouldn't be possible.
650	 */
651	if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu))
652		kvm_leave_nested(vcpu);
653	return ret;
654}
v6.2
  1/* SPDX-License-Identifier: GPL-2.0 */
 
  2
  3#include <linux/kvm_host.h>
  4#include "x86.h"
  5#include "kvm_cache_regs.h"
  6#include "kvm_emulate.h"
  7#include "smm.h"
  8#include "cpuid.h"
  9#include "trace.h"
 10
 11#define CHECK_SMRAM32_OFFSET(field, offset) \
 12	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
 13
 14#define CHECK_SMRAM64_OFFSET(field, offset) \
 15	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
 16
 17static void check_smram_offsets(void)
 18{
 19	/* 32 bit SMRAM image */
 20	CHECK_SMRAM32_OFFSET(reserved1,			0xFE00);
 21	CHECK_SMRAM32_OFFSET(smbase,			0xFEF8);
 22	CHECK_SMRAM32_OFFSET(smm_revision,		0xFEFC);
 23	CHECK_SMRAM32_OFFSET(io_inst_restart,		0xFF00);
 24	CHECK_SMRAM32_OFFSET(auto_hlt_restart,		0xFF02);
 25	CHECK_SMRAM32_OFFSET(io_restart_rdi,		0xFF04);
 26	CHECK_SMRAM32_OFFSET(io_restart_rcx,		0xFF08);
 27	CHECK_SMRAM32_OFFSET(io_restart_rsi,		0xFF0C);
 28	CHECK_SMRAM32_OFFSET(io_restart_rip,		0xFF10);
 29	CHECK_SMRAM32_OFFSET(cr4,			0xFF14);
 30	CHECK_SMRAM32_OFFSET(reserved2,			0xFF18);
 31	CHECK_SMRAM32_OFFSET(int_shadow,		0xFF1A);
 32	CHECK_SMRAM32_OFFSET(reserved3,			0xFF1B);
 33	CHECK_SMRAM32_OFFSET(ds,			0xFF2C);
 34	CHECK_SMRAM32_OFFSET(fs,			0xFF38);
 35	CHECK_SMRAM32_OFFSET(gs,			0xFF44);
 36	CHECK_SMRAM32_OFFSET(idtr,			0xFF50);
 37	CHECK_SMRAM32_OFFSET(tr,			0xFF5C);
 38	CHECK_SMRAM32_OFFSET(gdtr,			0xFF6C);
 39	CHECK_SMRAM32_OFFSET(ldtr,			0xFF78);
 40	CHECK_SMRAM32_OFFSET(es,			0xFF84);
 41	CHECK_SMRAM32_OFFSET(cs,			0xFF90);
 42	CHECK_SMRAM32_OFFSET(ss,			0xFF9C);
 43	CHECK_SMRAM32_OFFSET(es_sel,			0xFFA8);
 44	CHECK_SMRAM32_OFFSET(cs_sel,			0xFFAC);
 45	CHECK_SMRAM32_OFFSET(ss_sel,			0xFFB0);
 46	CHECK_SMRAM32_OFFSET(ds_sel,			0xFFB4);
 47	CHECK_SMRAM32_OFFSET(fs_sel,			0xFFB8);
 48	CHECK_SMRAM32_OFFSET(gs_sel,			0xFFBC);
 49	CHECK_SMRAM32_OFFSET(ldtr_sel,			0xFFC0);
 50	CHECK_SMRAM32_OFFSET(tr_sel,			0xFFC4);
 51	CHECK_SMRAM32_OFFSET(dr7,			0xFFC8);
 52	CHECK_SMRAM32_OFFSET(dr6,			0xFFCC);
 53	CHECK_SMRAM32_OFFSET(gprs,			0xFFD0);
 54	CHECK_SMRAM32_OFFSET(eip,			0xFFF0);
 55	CHECK_SMRAM32_OFFSET(eflags,			0xFFF4);
 56	CHECK_SMRAM32_OFFSET(cr3,			0xFFF8);
 57	CHECK_SMRAM32_OFFSET(cr0,			0xFFFC);
 58
 59	/* 64 bit SMRAM image */
 60	CHECK_SMRAM64_OFFSET(es,			0xFE00);
 61	CHECK_SMRAM64_OFFSET(cs,			0xFE10);
 62	CHECK_SMRAM64_OFFSET(ss,			0xFE20);
 63	CHECK_SMRAM64_OFFSET(ds,			0xFE30);
 64	CHECK_SMRAM64_OFFSET(fs,			0xFE40);
 65	CHECK_SMRAM64_OFFSET(gs,			0xFE50);
 66	CHECK_SMRAM64_OFFSET(gdtr,			0xFE60);
 67	CHECK_SMRAM64_OFFSET(ldtr,			0xFE70);
 68	CHECK_SMRAM64_OFFSET(idtr,			0xFE80);
 69	CHECK_SMRAM64_OFFSET(tr,			0xFE90);
 70	CHECK_SMRAM64_OFFSET(io_restart_rip,		0xFEA0);
 71	CHECK_SMRAM64_OFFSET(io_restart_rcx,		0xFEA8);
 72	CHECK_SMRAM64_OFFSET(io_restart_rsi,		0xFEB0);
 73	CHECK_SMRAM64_OFFSET(io_restart_rdi,		0xFEB8);
 74	CHECK_SMRAM64_OFFSET(io_restart_dword,		0xFEC0);
 75	CHECK_SMRAM64_OFFSET(reserved1,			0xFEC4);
 76	CHECK_SMRAM64_OFFSET(io_inst_restart,		0xFEC8);
 77	CHECK_SMRAM64_OFFSET(auto_hlt_restart,		0xFEC9);
 78	CHECK_SMRAM64_OFFSET(amd_nmi_mask,		0xFECA);
 79	CHECK_SMRAM64_OFFSET(int_shadow,		0xFECB);
 80	CHECK_SMRAM64_OFFSET(reserved2,			0xFECC);
 81	CHECK_SMRAM64_OFFSET(efer,			0xFED0);
 82	CHECK_SMRAM64_OFFSET(svm_guest_flag,		0xFED8);
 83	CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,	0xFEE0);
 84	CHECK_SMRAM64_OFFSET(svm_guest_virtual_int,	0xFEE8);
 85	CHECK_SMRAM64_OFFSET(reserved3,			0xFEF0);
 86	CHECK_SMRAM64_OFFSET(smm_revison,		0xFEFC);
 87	CHECK_SMRAM64_OFFSET(smbase,			0xFF00);
 88	CHECK_SMRAM64_OFFSET(reserved4,			0xFF04);
 89	CHECK_SMRAM64_OFFSET(ssp,			0xFF18);
 90	CHECK_SMRAM64_OFFSET(svm_guest_pat,		0xFF20);
 91	CHECK_SMRAM64_OFFSET(svm_host_efer,		0xFF28);
 92	CHECK_SMRAM64_OFFSET(svm_host_cr4,		0xFF30);
 93	CHECK_SMRAM64_OFFSET(svm_host_cr3,		0xFF38);
 94	CHECK_SMRAM64_OFFSET(svm_host_cr0,		0xFF40);
 95	CHECK_SMRAM64_OFFSET(cr4,			0xFF48);
 96	CHECK_SMRAM64_OFFSET(cr3,			0xFF50);
 97	CHECK_SMRAM64_OFFSET(cr0,			0xFF58);
 98	CHECK_SMRAM64_OFFSET(dr7,			0xFF60);
 99	CHECK_SMRAM64_OFFSET(dr6,			0xFF68);
100	CHECK_SMRAM64_OFFSET(rflags,			0xFF70);
101	CHECK_SMRAM64_OFFSET(rip,			0xFF78);
102	CHECK_SMRAM64_OFFSET(gprs,			0xFF80);
103
104	BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
105}
106
107#undef CHECK_SMRAM64_OFFSET
108#undef CHECK_SMRAM32_OFFSET
109
110
111void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
112{
113	BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
114
115	trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
116
117	if (entering_smm) {
118		vcpu->arch.hflags |= HF_SMM_MASK;
119	} else {
120		vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
121
122		/* Process a latched INIT or SMI, if any.  */
123		kvm_make_request(KVM_REQ_EVENT, vcpu);
124
125		/*
126		 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
127		 * on SMM exit we still need to reload them from
128		 * guest memory
129		 */
130		vcpu->arch.pdptrs_from_userspace = false;
131	}
132
133	kvm_mmu_reset_context(vcpu);
134}
135
136void process_smi(struct kvm_vcpu *vcpu)
137{
138	vcpu->arch.smi_pending = true;
139	kvm_make_request(KVM_REQ_EVENT, vcpu);
140}
141
142static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
143{
144	u32 flags = 0;
145	flags |= seg->g       << 23;
146	flags |= seg->db      << 22;
147	flags |= seg->l       << 21;
148	flags |= seg->avl     << 20;
149	flags |= seg->present << 15;
150	flags |= seg->dpl     << 13;
151	flags |= seg->s       << 12;
152	flags |= seg->type    << 8;
153	return flags;
154}
155
156static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
157				  struct kvm_smm_seg_state_32 *state,
158				  u32 *selector, int n)
159{
160	struct kvm_segment seg;
161
162	kvm_get_segment(vcpu, &seg, n);
163	*selector = seg.selector;
164	state->base = seg.base;
165	state->limit = seg.limit;
166	state->flags = enter_smm_get_segment_flags(&seg);
167}
168
169#ifdef CONFIG_X86_64
170static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
171				  struct kvm_smm_seg_state_64 *state,
172				  int n)
173{
174	struct kvm_segment seg;
175
176	kvm_get_segment(vcpu, &seg, n);
177	state->selector = seg.selector;
178	state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
179	state->limit = seg.limit;
180	state->base = seg.base;
181}
182#endif
183
184static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
185				    struct kvm_smram_state_32 *smram)
186{
187	struct desc_ptr dt;
188	unsigned long val;
189	int i;
190
191	smram->cr0     = kvm_read_cr0(vcpu);
192	smram->cr3     = kvm_read_cr3(vcpu);
193	smram->eflags  = kvm_get_rflags(vcpu);
194	smram->eip     = kvm_rip_read(vcpu);
195
196	for (i = 0; i < 8; i++)
197		smram->gprs[i] = kvm_register_read_raw(vcpu, i);
198
199	kvm_get_dr(vcpu, 6, &val);
200	smram->dr6     = (u32)val;
201	kvm_get_dr(vcpu, 7, &val);
202	smram->dr7     = (u32)val;
203
204	enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
205	enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
206
207	static_call(kvm_x86_get_gdt)(vcpu, &dt);
208	smram->gdtr.base = dt.address;
209	smram->gdtr.limit = dt.size;
210
211	static_call(kvm_x86_get_idt)(vcpu, &dt);
212	smram->idtr.base = dt.address;
213	smram->idtr.limit = dt.size;
214
215	enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
216	enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
217	enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
218
219	enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
220	enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
221	enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
222
223	smram->cr4 = kvm_read_cr4(vcpu);
224	smram->smm_revision = 0x00020000;
225	smram->smbase = vcpu->arch.smbase;
226
227	smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
228}
229
230#ifdef CONFIG_X86_64
231static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
232				    struct kvm_smram_state_64 *smram)
233{
234	struct desc_ptr dt;
235	unsigned long val;
236	int i;
237
238	for (i = 0; i < 16; i++)
239		smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
240
241	smram->rip    = kvm_rip_read(vcpu);
242	smram->rflags = kvm_get_rflags(vcpu);
243
244
245	kvm_get_dr(vcpu, 6, &val);
246	smram->dr6 = val;
247	kvm_get_dr(vcpu, 7, &val);
248	smram->dr7 = val;
249
250	smram->cr0 = kvm_read_cr0(vcpu);
251	smram->cr3 = kvm_read_cr3(vcpu);
252	smram->cr4 = kvm_read_cr4(vcpu);
253
254	smram->smbase = vcpu->arch.smbase;
255	smram->smm_revison = 0x00020064;
256
257	smram->efer = vcpu->arch.efer;
258
259	enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
260
261	static_call(kvm_x86_get_idt)(vcpu, &dt);
262	smram->idtr.limit = dt.size;
263	smram->idtr.base = dt.address;
264
265	enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
266
267	static_call(kvm_x86_get_gdt)(vcpu, &dt);
268	smram->gdtr.limit = dt.size;
269	smram->gdtr.base = dt.address;
270
271	enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
272	enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
273	enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
274	enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
275	enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
276	enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
277
278	smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
279}
280#endif
281
282void enter_smm(struct kvm_vcpu *vcpu)
283{
284	struct kvm_segment cs, ds;
285	struct desc_ptr dt;
286	unsigned long cr0;
287	union kvm_smram smram;
288
289	check_smram_offsets();
290
291	memset(smram.bytes, 0, sizeof(smram.bytes));
292
293#ifdef CONFIG_X86_64
294	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
295		enter_smm_save_state_64(vcpu, &smram.smram64);
296	else
297#endif
298		enter_smm_save_state_32(vcpu, &smram.smram32);
299
300	/*
301	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
302	 * state (e.g. leave guest mode) after we've saved the state into the
303	 * SMM state-save area.
304	 *
305	 * Kill the VM in the unlikely case of failure, because the VM
306	 * can be in undefined state in this case.
307	 */
308	if (static_call(kvm_x86_enter_smm)(vcpu, &smram))
309		goto error;
310
311	kvm_smm_changed(vcpu, true);
312
313	if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
314		goto error;
315
316	if (static_call(kvm_x86_get_nmi_mask)(vcpu))
317		vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
318	else
319		static_call(kvm_x86_set_nmi_mask)(vcpu, true);
320
321	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
322	kvm_rip_write(vcpu, 0x8000);
323
324	static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
325
326	cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
327	static_call(kvm_x86_set_cr0)(vcpu, cr0);
328	vcpu->arch.cr0 = cr0;
329
330	static_call(kvm_x86_set_cr4)(vcpu, 0);
331
332	/* Undocumented: IDT limit is set to zero on entry to SMM.  */
333	dt.address = dt.size = 0;
334	static_call(kvm_x86_set_idt)(vcpu, &dt);
335
336	if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
337		goto error;
338
339	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
340	cs.base = vcpu->arch.smbase;
341
342	ds.selector = 0;
343	ds.base = 0;
344
345	cs.limit    = ds.limit = 0xffffffff;
346	cs.type     = ds.type = 0x3;
347	cs.dpl      = ds.dpl = 0;
348	cs.db       = ds.db = 0;
349	cs.s        = ds.s = 1;
350	cs.l        = ds.l = 0;
351	cs.g        = ds.g = 1;
352	cs.avl      = ds.avl = 0;
353	cs.present  = ds.present = 1;
354	cs.unusable = ds.unusable = 0;
355	cs.padding  = ds.padding = 0;
356
357	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
358	kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
359	kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
360	kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
361	kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
362	kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
363
364#ifdef CONFIG_X86_64
365	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
366		if (static_call(kvm_x86_set_efer)(vcpu, 0))
367			goto error;
368#endif
369
370	kvm_update_cpuid_runtime(vcpu);
371	kvm_mmu_reset_context(vcpu);
372	return;
373error:
374	kvm_vm_dead(vcpu->kvm);
375}
376
377static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
378{
379	desc->g    = (flags >> 23) & 1;
380	desc->db   = (flags >> 22) & 1;
381	desc->l    = (flags >> 21) & 1;
382	desc->avl  = (flags >> 20) & 1;
383	desc->present = (flags >> 15) & 1;
384	desc->dpl  = (flags >> 13) & 3;
385	desc->s    = (flags >> 12) & 1;
386	desc->type = (flags >>  8) & 15;
387
388	desc->unusable = !desc->present;
389	desc->padding = 0;
390}
391
392static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
393			   const struct kvm_smm_seg_state_32 *state,
394			   u16 selector, int n)
395{
396	struct kvm_segment desc;
397
398	desc.selector =           selector;
399	desc.base =               state->base;
400	desc.limit =              state->limit;
401	rsm_set_desc_flags(&desc, state->flags);
402	kvm_set_segment(vcpu, &desc, n);
403	return X86EMUL_CONTINUE;
404}
405
406#ifdef CONFIG_X86_64
407
408static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
409			   const struct kvm_smm_seg_state_64 *state,
410			   int n)
411{
412	struct kvm_segment desc;
413
414	desc.selector =           state->selector;
415	rsm_set_desc_flags(&desc, state->attributes << 8);
416	desc.limit =              state->limit;
417	desc.base =               state->base;
418	kvm_set_segment(vcpu, &desc, n);
419	return X86EMUL_CONTINUE;
420}
421#endif
422
423static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
424				    u64 cr0, u64 cr3, u64 cr4)
425{
426	int bad;
427	u64 pcid;
428
429	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
430	pcid = 0;
431	if (cr4 & X86_CR4_PCIDE) {
432		pcid = cr3 & 0xfff;
433		cr3 &= ~0xfff;
434	}
435
436	bad = kvm_set_cr3(vcpu, cr3);
437	if (bad)
438		return X86EMUL_UNHANDLEABLE;
439
440	/*
441	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
442	 * Then enable protected mode.	However, PCID cannot be enabled
443	 * if EFER.LMA=0, so set it separately.
444	 */
445	bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
446	if (bad)
447		return X86EMUL_UNHANDLEABLE;
448
449	bad = kvm_set_cr0(vcpu, cr0);
450	if (bad)
451		return X86EMUL_UNHANDLEABLE;
452
453	if (cr4 & X86_CR4_PCIDE) {
454		bad = kvm_set_cr4(vcpu, cr4);
455		if (bad)
456			return X86EMUL_UNHANDLEABLE;
457		if (pcid) {
458			bad = kvm_set_cr3(vcpu, cr3 | pcid);
459			if (bad)
460				return X86EMUL_UNHANDLEABLE;
461		}
462
463	}
464
465	return X86EMUL_CONTINUE;
466}
467
468static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
469			     const struct kvm_smram_state_32 *smstate)
470{
471	struct kvm_vcpu *vcpu = ctxt->vcpu;
472	struct desc_ptr dt;
473	int i, r;
474
475	ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
476	ctxt->_eip =  smstate->eip;
477
478	for (i = 0; i < 8; i++)
479		*reg_write(ctxt, i) = smstate->gprs[i];
480
481	if (kvm_set_dr(vcpu, 6, smstate->dr6))
482		return X86EMUL_UNHANDLEABLE;
483	if (kvm_set_dr(vcpu, 7, smstate->dr7))
484		return X86EMUL_UNHANDLEABLE;
485
486	rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
487	rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
488
489	dt.address =               smstate->gdtr.base;
490	dt.size =                  smstate->gdtr.limit;
491	static_call(kvm_x86_set_gdt)(vcpu, &dt);
492
493	dt.address =               smstate->idtr.base;
494	dt.size =                  smstate->idtr.limit;
495	static_call(kvm_x86_set_idt)(vcpu, &dt);
496
497	rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
498	rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
499	rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
500
501	rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
502	rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
503	rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
504
505	vcpu->arch.smbase = smstate->smbase;
506
507	r = rsm_enter_protected_mode(vcpu, smstate->cr0,
508					smstate->cr3, smstate->cr4);
509
510	if (r != X86EMUL_CONTINUE)
511		return r;
512
513	static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
514	ctxt->interruptibility = (u8)smstate->int_shadow;
515
516	return r;
517}
518
519#ifdef CONFIG_X86_64
520static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
521			     const struct kvm_smram_state_64 *smstate)
522{
523	struct kvm_vcpu *vcpu = ctxt->vcpu;
524	struct desc_ptr dt;
525	int i, r;
526
527	for (i = 0; i < 16; i++)
528		*reg_write(ctxt, i) = smstate->gprs[15 - i];
529
530	ctxt->_eip   = smstate->rip;
531	ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
532
533	if (kvm_set_dr(vcpu, 6, smstate->dr6))
534		return X86EMUL_UNHANDLEABLE;
535	if (kvm_set_dr(vcpu, 7, smstate->dr7))
536		return X86EMUL_UNHANDLEABLE;
537
538	vcpu->arch.smbase =         smstate->smbase;
539
540	if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
541		return X86EMUL_UNHANDLEABLE;
542
543	rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
544
545	dt.size =                   smstate->idtr.limit;
546	dt.address =                smstate->idtr.base;
547	static_call(kvm_x86_set_idt)(vcpu, &dt);
548
549	rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
550
551	dt.size =                   smstate->gdtr.limit;
552	dt.address =                smstate->gdtr.base;
553	static_call(kvm_x86_set_gdt)(vcpu, &dt);
554
555	r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
556	if (r != X86EMUL_CONTINUE)
557		return r;
558
559	rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
560	rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
561	rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
562	rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
563	rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
564	rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
565
566	static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
567	ctxt->interruptibility = (u8)smstate->int_shadow;
568
569	return X86EMUL_CONTINUE;
570}
571#endif
572
573int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
574{
575	struct kvm_vcpu *vcpu = ctxt->vcpu;
576	unsigned long cr0;
577	union kvm_smram smram;
578	u64 smbase;
579	int ret;
580
581	smbase = vcpu->arch.smbase;
582
583	ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
584	if (ret < 0)
585		return X86EMUL_UNHANDLEABLE;
586
587	if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
588		static_call(kvm_x86_set_nmi_mask)(vcpu, false);
589
590	kvm_smm_changed(vcpu, false);
591
592	/*
593	 * Get back to real mode, to prepare a safe state in which to load
594	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
595	 * supports long mode.
596	 */
597#ifdef CONFIG_X86_64
598	if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
599		struct kvm_segment cs_desc;
600		unsigned long cr4;
601
602		/* Zero CR4.PCIDE before CR0.PG.  */
603		cr4 = kvm_read_cr4(vcpu);
604		if (cr4 & X86_CR4_PCIDE)
605			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
606
607		/* A 32-bit code segment is required to clear EFER.LMA.  */
608		memset(&cs_desc, 0, sizeof(cs_desc));
609		cs_desc.type = 0xb;
610		cs_desc.s = cs_desc.g = cs_desc.present = 1;
611		kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
612	}
613#endif
614
615	/* For the 64-bit case, this will clear EFER.LMA.  */
616	cr0 = kvm_read_cr0(vcpu);
617	if (cr0 & X86_CR0_PE)
618		kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
619
620#ifdef CONFIG_X86_64
621	if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
622		unsigned long cr4, efer;
623
624		/* Clear CR4.PAE before clearing EFER.LME. */
625		cr4 = kvm_read_cr4(vcpu);
626		if (cr4 & X86_CR4_PAE)
627			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
628
629		/* And finally go back to 32-bit mode.  */
630		efer = 0;
631		kvm_set_msr(vcpu, MSR_EFER, efer);
632	}
633#endif
634
635	/*
636	 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
637	 * state (e.g. enter guest mode) before loading state from the SMM
638	 * state-save area.
 
 
639	 */
640	if (static_call(kvm_x86_leave_smm)(vcpu, &smram))
641		return X86EMUL_UNHANDLEABLE;
642
643#ifdef CONFIG_X86_64
644	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
645		return rsm_load_state_64(ctxt, &smram.smram64);
646	else
647#endif
648		return rsm_load_state_32(ctxt, &smram.smram32);
 
 
 
 
 
 
 
 
 
 
 
 
649}