Loading...
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#include <linux/kvm_host.h>
4#include "x86.h"
5#include "kvm_cache_regs.h"
6#include "kvm_emulate.h"
7#include "smm.h"
8#include "cpuid.h"
9#include "trace.h"
10
11#define CHECK_SMRAM32_OFFSET(field, offset) \
12 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
13
14#define CHECK_SMRAM64_OFFSET(field, offset) \
15 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
16
17static void check_smram_offsets(void)
18{
19 /* 32 bit SMRAM image */
20 CHECK_SMRAM32_OFFSET(reserved1, 0xFE00);
21 CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
22 CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
23 CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00);
24 CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02);
25 CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04);
26 CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08);
27 CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C);
28 CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10);
29 CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
30 CHECK_SMRAM32_OFFSET(reserved2, 0xFF18);
31 CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A);
32 CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B);
33 CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
34 CHECK_SMRAM32_OFFSET(fs, 0xFF38);
35 CHECK_SMRAM32_OFFSET(gs, 0xFF44);
36 CHECK_SMRAM32_OFFSET(idtr, 0xFF50);
37 CHECK_SMRAM32_OFFSET(tr, 0xFF5C);
38 CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C);
39 CHECK_SMRAM32_OFFSET(ldtr, 0xFF78);
40 CHECK_SMRAM32_OFFSET(es, 0xFF84);
41 CHECK_SMRAM32_OFFSET(cs, 0xFF90);
42 CHECK_SMRAM32_OFFSET(ss, 0xFF9C);
43 CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8);
44 CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC);
45 CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0);
46 CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4);
47 CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8);
48 CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC);
49 CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0);
50 CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4);
51 CHECK_SMRAM32_OFFSET(dr7, 0xFFC8);
52 CHECK_SMRAM32_OFFSET(dr6, 0xFFCC);
53 CHECK_SMRAM32_OFFSET(gprs, 0xFFD0);
54 CHECK_SMRAM32_OFFSET(eip, 0xFFF0);
55 CHECK_SMRAM32_OFFSET(eflags, 0xFFF4);
56 CHECK_SMRAM32_OFFSET(cr3, 0xFFF8);
57 CHECK_SMRAM32_OFFSET(cr0, 0xFFFC);
58
59 /* 64 bit SMRAM image */
60 CHECK_SMRAM64_OFFSET(es, 0xFE00);
61 CHECK_SMRAM64_OFFSET(cs, 0xFE10);
62 CHECK_SMRAM64_OFFSET(ss, 0xFE20);
63 CHECK_SMRAM64_OFFSET(ds, 0xFE30);
64 CHECK_SMRAM64_OFFSET(fs, 0xFE40);
65 CHECK_SMRAM64_OFFSET(gs, 0xFE50);
66 CHECK_SMRAM64_OFFSET(gdtr, 0xFE60);
67 CHECK_SMRAM64_OFFSET(ldtr, 0xFE70);
68 CHECK_SMRAM64_OFFSET(idtr, 0xFE80);
69 CHECK_SMRAM64_OFFSET(tr, 0xFE90);
70 CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0);
71 CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8);
72 CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
73 CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
74 CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
75 CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
76 CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
77 CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
78 CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA);
79 CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB);
80 CHECK_SMRAM64_OFFSET(reserved2, 0xFECC);
81 CHECK_SMRAM64_OFFSET(efer, 0xFED0);
82 CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8);
83 CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0);
84 CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8);
85 CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0);
86 CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC);
87 CHECK_SMRAM64_OFFSET(smbase, 0xFF00);
88 CHECK_SMRAM64_OFFSET(reserved4, 0xFF04);
89 CHECK_SMRAM64_OFFSET(ssp, 0xFF18);
90 CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20);
91 CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28);
92 CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30);
93 CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38);
94 CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40);
95 CHECK_SMRAM64_OFFSET(cr4, 0xFF48);
96 CHECK_SMRAM64_OFFSET(cr3, 0xFF50);
97 CHECK_SMRAM64_OFFSET(cr0, 0xFF58);
98 CHECK_SMRAM64_OFFSET(dr7, 0xFF60);
99 CHECK_SMRAM64_OFFSET(dr6, 0xFF68);
100 CHECK_SMRAM64_OFFSET(rflags, 0xFF70);
101 CHECK_SMRAM64_OFFSET(rip, 0xFF78);
102 CHECK_SMRAM64_OFFSET(gprs, 0xFF80);
103
104 BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
105}
106
107#undef CHECK_SMRAM64_OFFSET
108#undef CHECK_SMRAM32_OFFSET
109
110
111void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
112{
113 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
114
115 trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
116
117 if (entering_smm) {
118 vcpu->arch.hflags |= HF_SMM_MASK;
119 } else {
120 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
121
122 /* Process a latched INIT or SMI, if any. */
123 kvm_make_request(KVM_REQ_EVENT, vcpu);
124
125 /*
126 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
127 * on SMM exit we still need to reload them from
128 * guest memory
129 */
130 vcpu->arch.pdptrs_from_userspace = false;
131 }
132
133 kvm_mmu_reset_context(vcpu);
134}
135
136void process_smi(struct kvm_vcpu *vcpu)
137{
138 vcpu->arch.smi_pending = true;
139 kvm_make_request(KVM_REQ_EVENT, vcpu);
140}
141
142static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
143{
144 u32 flags = 0;
145 flags |= seg->g << 23;
146 flags |= seg->db << 22;
147 flags |= seg->l << 21;
148 flags |= seg->avl << 20;
149 flags |= seg->present << 15;
150 flags |= seg->dpl << 13;
151 flags |= seg->s << 12;
152 flags |= seg->type << 8;
153 return flags;
154}
155
156static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
157 struct kvm_smm_seg_state_32 *state,
158 u32 *selector, int n)
159{
160 struct kvm_segment seg;
161
162 kvm_get_segment(vcpu, &seg, n);
163 *selector = seg.selector;
164 state->base = seg.base;
165 state->limit = seg.limit;
166 state->flags = enter_smm_get_segment_flags(&seg);
167}
168
169#ifdef CONFIG_X86_64
170static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
171 struct kvm_smm_seg_state_64 *state,
172 int n)
173{
174 struct kvm_segment seg;
175
176 kvm_get_segment(vcpu, &seg, n);
177 state->selector = seg.selector;
178 state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
179 state->limit = seg.limit;
180 state->base = seg.base;
181}
182#endif
183
184static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
185 struct kvm_smram_state_32 *smram)
186{
187 struct desc_ptr dt;
188 unsigned long val;
189 int i;
190
191 smram->cr0 = kvm_read_cr0(vcpu);
192 smram->cr3 = kvm_read_cr3(vcpu);
193 smram->eflags = kvm_get_rflags(vcpu);
194 smram->eip = kvm_rip_read(vcpu);
195
196 for (i = 0; i < 8; i++)
197 smram->gprs[i] = kvm_register_read_raw(vcpu, i);
198
199 kvm_get_dr(vcpu, 6, &val);
200 smram->dr6 = (u32)val;
201 kvm_get_dr(vcpu, 7, &val);
202 smram->dr7 = (u32)val;
203
204 enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
205 enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
206
207 static_call(kvm_x86_get_gdt)(vcpu, &dt);
208 smram->gdtr.base = dt.address;
209 smram->gdtr.limit = dt.size;
210
211 static_call(kvm_x86_get_idt)(vcpu, &dt);
212 smram->idtr.base = dt.address;
213 smram->idtr.limit = dt.size;
214
215 enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
216 enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
217 enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
218
219 enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
220 enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
221 enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
222
223 smram->cr4 = kvm_read_cr4(vcpu);
224 smram->smm_revision = 0x00020000;
225 smram->smbase = vcpu->arch.smbase;
226
227 smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
228}
229
230#ifdef CONFIG_X86_64
231static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
232 struct kvm_smram_state_64 *smram)
233{
234 struct desc_ptr dt;
235 unsigned long val;
236 int i;
237
238 for (i = 0; i < 16; i++)
239 smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
240
241 smram->rip = kvm_rip_read(vcpu);
242 smram->rflags = kvm_get_rflags(vcpu);
243
244
245 kvm_get_dr(vcpu, 6, &val);
246 smram->dr6 = val;
247 kvm_get_dr(vcpu, 7, &val);
248 smram->dr7 = val;
249
250 smram->cr0 = kvm_read_cr0(vcpu);
251 smram->cr3 = kvm_read_cr3(vcpu);
252 smram->cr4 = kvm_read_cr4(vcpu);
253
254 smram->smbase = vcpu->arch.smbase;
255 smram->smm_revison = 0x00020064;
256
257 smram->efer = vcpu->arch.efer;
258
259 enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
260
261 static_call(kvm_x86_get_idt)(vcpu, &dt);
262 smram->idtr.limit = dt.size;
263 smram->idtr.base = dt.address;
264
265 enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
266
267 static_call(kvm_x86_get_gdt)(vcpu, &dt);
268 smram->gdtr.limit = dt.size;
269 smram->gdtr.base = dt.address;
270
271 enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
272 enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
273 enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
274 enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
275 enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
276 enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
277
278 smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
279}
280#endif
281
282void enter_smm(struct kvm_vcpu *vcpu)
283{
284 struct kvm_segment cs, ds;
285 struct desc_ptr dt;
286 unsigned long cr0;
287 union kvm_smram smram;
288
289 check_smram_offsets();
290
291 memset(smram.bytes, 0, sizeof(smram.bytes));
292
293#ifdef CONFIG_X86_64
294 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
295 enter_smm_save_state_64(vcpu, &smram.smram64);
296 else
297#endif
298 enter_smm_save_state_32(vcpu, &smram.smram32);
299
300 /*
301 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
302 * state (e.g. leave guest mode) after we've saved the state into the
303 * SMM state-save area.
304 *
305 * Kill the VM in the unlikely case of failure, because the VM
306 * can be in undefined state in this case.
307 */
308 if (static_call(kvm_x86_enter_smm)(vcpu, &smram))
309 goto error;
310
311 kvm_smm_changed(vcpu, true);
312
313 if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
314 goto error;
315
316 if (static_call(kvm_x86_get_nmi_mask)(vcpu))
317 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
318 else
319 static_call(kvm_x86_set_nmi_mask)(vcpu, true);
320
321 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
322 kvm_rip_write(vcpu, 0x8000);
323
324 static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
325
326 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
327 static_call(kvm_x86_set_cr0)(vcpu, cr0);
328 vcpu->arch.cr0 = cr0;
329
330 static_call(kvm_x86_set_cr4)(vcpu, 0);
331
332 /* Undocumented: IDT limit is set to zero on entry to SMM. */
333 dt.address = dt.size = 0;
334 static_call(kvm_x86_set_idt)(vcpu, &dt);
335
336 if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
337 goto error;
338
339 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
340 cs.base = vcpu->arch.smbase;
341
342 ds.selector = 0;
343 ds.base = 0;
344
345 cs.limit = ds.limit = 0xffffffff;
346 cs.type = ds.type = 0x3;
347 cs.dpl = ds.dpl = 0;
348 cs.db = ds.db = 0;
349 cs.s = ds.s = 1;
350 cs.l = ds.l = 0;
351 cs.g = ds.g = 1;
352 cs.avl = ds.avl = 0;
353 cs.present = ds.present = 1;
354 cs.unusable = ds.unusable = 0;
355 cs.padding = ds.padding = 0;
356
357 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
358 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
359 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
360 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
361 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
362 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
363
364#ifdef CONFIG_X86_64
365 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
366 if (static_call(kvm_x86_set_efer)(vcpu, 0))
367 goto error;
368#endif
369
370 kvm_update_cpuid_runtime(vcpu);
371 kvm_mmu_reset_context(vcpu);
372 return;
373error:
374 kvm_vm_dead(vcpu->kvm);
375}
376
377static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
378{
379 desc->g = (flags >> 23) & 1;
380 desc->db = (flags >> 22) & 1;
381 desc->l = (flags >> 21) & 1;
382 desc->avl = (flags >> 20) & 1;
383 desc->present = (flags >> 15) & 1;
384 desc->dpl = (flags >> 13) & 3;
385 desc->s = (flags >> 12) & 1;
386 desc->type = (flags >> 8) & 15;
387
388 desc->unusable = !desc->present;
389 desc->padding = 0;
390}
391
392static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
393 const struct kvm_smm_seg_state_32 *state,
394 u16 selector, int n)
395{
396 struct kvm_segment desc;
397
398 desc.selector = selector;
399 desc.base = state->base;
400 desc.limit = state->limit;
401 rsm_set_desc_flags(&desc, state->flags);
402 kvm_set_segment(vcpu, &desc, n);
403 return X86EMUL_CONTINUE;
404}
405
406#ifdef CONFIG_X86_64
407
408static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
409 const struct kvm_smm_seg_state_64 *state,
410 int n)
411{
412 struct kvm_segment desc;
413
414 desc.selector = state->selector;
415 rsm_set_desc_flags(&desc, state->attributes << 8);
416 desc.limit = state->limit;
417 desc.base = state->base;
418 kvm_set_segment(vcpu, &desc, n);
419 return X86EMUL_CONTINUE;
420}
421#endif
422
423static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
424 u64 cr0, u64 cr3, u64 cr4)
425{
426 int bad;
427 u64 pcid;
428
429 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
430 pcid = 0;
431 if (cr4 & X86_CR4_PCIDE) {
432 pcid = cr3 & 0xfff;
433 cr3 &= ~0xfff;
434 }
435
436 bad = kvm_set_cr3(vcpu, cr3);
437 if (bad)
438 return X86EMUL_UNHANDLEABLE;
439
440 /*
441 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
442 * Then enable protected mode. However, PCID cannot be enabled
443 * if EFER.LMA=0, so set it separately.
444 */
445 bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
446 if (bad)
447 return X86EMUL_UNHANDLEABLE;
448
449 bad = kvm_set_cr0(vcpu, cr0);
450 if (bad)
451 return X86EMUL_UNHANDLEABLE;
452
453 if (cr4 & X86_CR4_PCIDE) {
454 bad = kvm_set_cr4(vcpu, cr4);
455 if (bad)
456 return X86EMUL_UNHANDLEABLE;
457 if (pcid) {
458 bad = kvm_set_cr3(vcpu, cr3 | pcid);
459 if (bad)
460 return X86EMUL_UNHANDLEABLE;
461 }
462
463 }
464
465 return X86EMUL_CONTINUE;
466}
467
468static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
469 const struct kvm_smram_state_32 *smstate)
470{
471 struct kvm_vcpu *vcpu = ctxt->vcpu;
472 struct desc_ptr dt;
473 int i, r;
474
475 ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
476 ctxt->_eip = smstate->eip;
477
478 for (i = 0; i < 8; i++)
479 *reg_write(ctxt, i) = smstate->gprs[i];
480
481 if (kvm_set_dr(vcpu, 6, smstate->dr6))
482 return X86EMUL_UNHANDLEABLE;
483 if (kvm_set_dr(vcpu, 7, smstate->dr7))
484 return X86EMUL_UNHANDLEABLE;
485
486 rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
487 rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
488
489 dt.address = smstate->gdtr.base;
490 dt.size = smstate->gdtr.limit;
491 static_call(kvm_x86_set_gdt)(vcpu, &dt);
492
493 dt.address = smstate->idtr.base;
494 dt.size = smstate->idtr.limit;
495 static_call(kvm_x86_set_idt)(vcpu, &dt);
496
497 rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
498 rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
499 rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
500
501 rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
502 rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
503 rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
504
505 vcpu->arch.smbase = smstate->smbase;
506
507 r = rsm_enter_protected_mode(vcpu, smstate->cr0,
508 smstate->cr3, smstate->cr4);
509
510 if (r != X86EMUL_CONTINUE)
511 return r;
512
513 static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
514 ctxt->interruptibility = (u8)smstate->int_shadow;
515
516 return r;
517}
518
519#ifdef CONFIG_X86_64
520static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
521 const struct kvm_smram_state_64 *smstate)
522{
523 struct kvm_vcpu *vcpu = ctxt->vcpu;
524 struct desc_ptr dt;
525 int i, r;
526
527 for (i = 0; i < 16; i++)
528 *reg_write(ctxt, i) = smstate->gprs[15 - i];
529
530 ctxt->_eip = smstate->rip;
531 ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
532
533 if (kvm_set_dr(vcpu, 6, smstate->dr6))
534 return X86EMUL_UNHANDLEABLE;
535 if (kvm_set_dr(vcpu, 7, smstate->dr7))
536 return X86EMUL_UNHANDLEABLE;
537
538 vcpu->arch.smbase = smstate->smbase;
539
540 if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
541 return X86EMUL_UNHANDLEABLE;
542
543 rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
544
545 dt.size = smstate->idtr.limit;
546 dt.address = smstate->idtr.base;
547 static_call(kvm_x86_set_idt)(vcpu, &dt);
548
549 rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
550
551 dt.size = smstate->gdtr.limit;
552 dt.address = smstate->gdtr.base;
553 static_call(kvm_x86_set_gdt)(vcpu, &dt);
554
555 r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
556 if (r != X86EMUL_CONTINUE)
557 return r;
558
559 rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
560 rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
561 rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
562 rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
563 rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
564 rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
565
566 static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
567 ctxt->interruptibility = (u8)smstate->int_shadow;
568
569 return X86EMUL_CONTINUE;
570}
571#endif
572
573int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
574{
575 struct kvm_vcpu *vcpu = ctxt->vcpu;
576 unsigned long cr0;
577 union kvm_smram smram;
578 u64 smbase;
579 int ret;
580
581 smbase = vcpu->arch.smbase;
582
583 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
584 if (ret < 0)
585 return X86EMUL_UNHANDLEABLE;
586
587 if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
588 static_call(kvm_x86_set_nmi_mask)(vcpu, false);
589
590 kvm_smm_changed(vcpu, false);
591
592 /*
593 * Get back to real mode, to prepare a safe state in which to load
594 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
595 * supports long mode.
596 */
597#ifdef CONFIG_X86_64
598 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
599 struct kvm_segment cs_desc;
600 unsigned long cr4;
601
602 /* Zero CR4.PCIDE before CR0.PG. */
603 cr4 = kvm_read_cr4(vcpu);
604 if (cr4 & X86_CR4_PCIDE)
605 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
606
607 /* A 32-bit code segment is required to clear EFER.LMA. */
608 memset(&cs_desc, 0, sizeof(cs_desc));
609 cs_desc.type = 0xb;
610 cs_desc.s = cs_desc.g = cs_desc.present = 1;
611 kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
612 }
613#endif
614
615 /* For the 64-bit case, this will clear EFER.LMA. */
616 cr0 = kvm_read_cr0(vcpu);
617 if (cr0 & X86_CR0_PE)
618 kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
619
620#ifdef CONFIG_X86_64
621 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
622 unsigned long cr4, efer;
623
624 /* Clear CR4.PAE before clearing EFER.LME. */
625 cr4 = kvm_read_cr4(vcpu);
626 if (cr4 & X86_CR4_PAE)
627 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
628
629 /* And finally go back to 32-bit mode. */
630 efer = 0;
631 kvm_set_msr(vcpu, MSR_EFER, efer);
632 }
633#endif
634
635 /*
636 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
637 * state (e.g. enter guest mode) before loading state from the SMM
638 * state-save area.
639 */
640 if (static_call(kvm_x86_leave_smm)(vcpu, &smram))
641 return X86EMUL_UNHANDLEABLE;
642
643#ifdef CONFIG_X86_64
644 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
645 return rsm_load_state_64(ctxt, &smram.smram64);
646 else
647#endif
648 return rsm_load_state_32(ctxt, &smram.smram32);
649}
1/* SPDX-License-Identifier: GPL-2.0 */
2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4#include <linux/kvm_host.h>
5#include "x86.h"
6#include "kvm_cache_regs.h"
7#include "kvm_emulate.h"
8#include "smm.h"
9#include "cpuid.h"
10#include "trace.h"
11
12#define CHECK_SMRAM32_OFFSET(field, offset) \
13 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
14
15#define CHECK_SMRAM64_OFFSET(field, offset) \
16 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
17
18static void check_smram_offsets(void)
19{
20 /* 32 bit SMRAM image */
21 CHECK_SMRAM32_OFFSET(reserved1, 0xFE00);
22 CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
23 CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
24 CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00);
25 CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02);
26 CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04);
27 CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08);
28 CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C);
29 CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10);
30 CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
31 CHECK_SMRAM32_OFFSET(reserved2, 0xFF18);
32 CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A);
33 CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B);
34 CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
35 CHECK_SMRAM32_OFFSET(fs, 0xFF38);
36 CHECK_SMRAM32_OFFSET(gs, 0xFF44);
37 CHECK_SMRAM32_OFFSET(idtr, 0xFF50);
38 CHECK_SMRAM32_OFFSET(tr, 0xFF5C);
39 CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C);
40 CHECK_SMRAM32_OFFSET(ldtr, 0xFF78);
41 CHECK_SMRAM32_OFFSET(es, 0xFF84);
42 CHECK_SMRAM32_OFFSET(cs, 0xFF90);
43 CHECK_SMRAM32_OFFSET(ss, 0xFF9C);
44 CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8);
45 CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC);
46 CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0);
47 CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4);
48 CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8);
49 CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC);
50 CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0);
51 CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4);
52 CHECK_SMRAM32_OFFSET(dr7, 0xFFC8);
53 CHECK_SMRAM32_OFFSET(dr6, 0xFFCC);
54 CHECK_SMRAM32_OFFSET(gprs, 0xFFD0);
55 CHECK_SMRAM32_OFFSET(eip, 0xFFF0);
56 CHECK_SMRAM32_OFFSET(eflags, 0xFFF4);
57 CHECK_SMRAM32_OFFSET(cr3, 0xFFF8);
58 CHECK_SMRAM32_OFFSET(cr0, 0xFFFC);
59
60 /* 64 bit SMRAM image */
61 CHECK_SMRAM64_OFFSET(es, 0xFE00);
62 CHECK_SMRAM64_OFFSET(cs, 0xFE10);
63 CHECK_SMRAM64_OFFSET(ss, 0xFE20);
64 CHECK_SMRAM64_OFFSET(ds, 0xFE30);
65 CHECK_SMRAM64_OFFSET(fs, 0xFE40);
66 CHECK_SMRAM64_OFFSET(gs, 0xFE50);
67 CHECK_SMRAM64_OFFSET(gdtr, 0xFE60);
68 CHECK_SMRAM64_OFFSET(ldtr, 0xFE70);
69 CHECK_SMRAM64_OFFSET(idtr, 0xFE80);
70 CHECK_SMRAM64_OFFSET(tr, 0xFE90);
71 CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0);
72 CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8);
73 CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
74 CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
75 CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
76 CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
77 CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
78 CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
79 CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA);
80 CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB);
81 CHECK_SMRAM64_OFFSET(reserved2, 0xFECC);
82 CHECK_SMRAM64_OFFSET(efer, 0xFED0);
83 CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8);
84 CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0);
85 CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8);
86 CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0);
87 CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC);
88 CHECK_SMRAM64_OFFSET(smbase, 0xFF00);
89 CHECK_SMRAM64_OFFSET(reserved4, 0xFF04);
90 CHECK_SMRAM64_OFFSET(ssp, 0xFF18);
91 CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20);
92 CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28);
93 CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30);
94 CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38);
95 CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40);
96 CHECK_SMRAM64_OFFSET(cr4, 0xFF48);
97 CHECK_SMRAM64_OFFSET(cr3, 0xFF50);
98 CHECK_SMRAM64_OFFSET(cr0, 0xFF58);
99 CHECK_SMRAM64_OFFSET(dr7, 0xFF60);
100 CHECK_SMRAM64_OFFSET(dr6, 0xFF68);
101 CHECK_SMRAM64_OFFSET(rflags, 0xFF70);
102 CHECK_SMRAM64_OFFSET(rip, 0xFF78);
103 CHECK_SMRAM64_OFFSET(gprs, 0xFF80);
104
105 BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
106}
107
108#undef CHECK_SMRAM64_OFFSET
109#undef CHECK_SMRAM32_OFFSET
110
111
112void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
113{
114 trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
115
116 if (entering_smm) {
117 vcpu->arch.hflags |= HF_SMM_MASK;
118 } else {
119 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
120
121 /* Process a latched INIT or SMI, if any. */
122 kvm_make_request(KVM_REQ_EVENT, vcpu);
123
124 /*
125 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
126 * on SMM exit we still need to reload them from
127 * guest memory
128 */
129 vcpu->arch.pdptrs_from_userspace = false;
130 }
131
132 kvm_mmu_reset_context(vcpu);
133}
134
135void process_smi(struct kvm_vcpu *vcpu)
136{
137 vcpu->arch.smi_pending = true;
138 kvm_make_request(KVM_REQ_EVENT, vcpu);
139}
140
141static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
142{
143 u32 flags = 0;
144 flags |= seg->g << 23;
145 flags |= seg->db << 22;
146 flags |= seg->l << 21;
147 flags |= seg->avl << 20;
148 flags |= seg->present << 15;
149 flags |= seg->dpl << 13;
150 flags |= seg->s << 12;
151 flags |= seg->type << 8;
152 return flags;
153}
154
155static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
156 struct kvm_smm_seg_state_32 *state,
157 u32 *selector, int n)
158{
159 struct kvm_segment seg;
160
161 kvm_get_segment(vcpu, &seg, n);
162 *selector = seg.selector;
163 state->base = seg.base;
164 state->limit = seg.limit;
165 state->flags = enter_smm_get_segment_flags(&seg);
166}
167
168#ifdef CONFIG_X86_64
169static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
170 struct kvm_smm_seg_state_64 *state,
171 int n)
172{
173 struct kvm_segment seg;
174
175 kvm_get_segment(vcpu, &seg, n);
176 state->selector = seg.selector;
177 state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
178 state->limit = seg.limit;
179 state->base = seg.base;
180}
181#endif
182
183static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
184 struct kvm_smram_state_32 *smram)
185{
186 struct desc_ptr dt;
187 unsigned long val;
188 int i;
189
190 smram->cr0 = kvm_read_cr0(vcpu);
191 smram->cr3 = kvm_read_cr3(vcpu);
192 smram->eflags = kvm_get_rflags(vcpu);
193 smram->eip = kvm_rip_read(vcpu);
194
195 for (i = 0; i < 8; i++)
196 smram->gprs[i] = kvm_register_read_raw(vcpu, i);
197
198 kvm_get_dr(vcpu, 6, &val);
199 smram->dr6 = (u32)val;
200 kvm_get_dr(vcpu, 7, &val);
201 smram->dr7 = (u32)val;
202
203 enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
204 enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
205
206 static_call(kvm_x86_get_gdt)(vcpu, &dt);
207 smram->gdtr.base = dt.address;
208 smram->gdtr.limit = dt.size;
209
210 static_call(kvm_x86_get_idt)(vcpu, &dt);
211 smram->idtr.base = dt.address;
212 smram->idtr.limit = dt.size;
213
214 enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
215 enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
216 enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
217
218 enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
219 enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
220 enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
221
222 smram->cr4 = kvm_read_cr4(vcpu);
223 smram->smm_revision = 0x00020000;
224 smram->smbase = vcpu->arch.smbase;
225
226 smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
227}
228
229#ifdef CONFIG_X86_64
230static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
231 struct kvm_smram_state_64 *smram)
232{
233 struct desc_ptr dt;
234 unsigned long val;
235 int i;
236
237 for (i = 0; i < 16; i++)
238 smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
239
240 smram->rip = kvm_rip_read(vcpu);
241 smram->rflags = kvm_get_rflags(vcpu);
242
243
244 kvm_get_dr(vcpu, 6, &val);
245 smram->dr6 = val;
246 kvm_get_dr(vcpu, 7, &val);
247 smram->dr7 = val;
248
249 smram->cr0 = kvm_read_cr0(vcpu);
250 smram->cr3 = kvm_read_cr3(vcpu);
251 smram->cr4 = kvm_read_cr4(vcpu);
252
253 smram->smbase = vcpu->arch.smbase;
254 smram->smm_revison = 0x00020064;
255
256 smram->efer = vcpu->arch.efer;
257
258 enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
259
260 static_call(kvm_x86_get_idt)(vcpu, &dt);
261 smram->idtr.limit = dt.size;
262 smram->idtr.base = dt.address;
263
264 enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
265
266 static_call(kvm_x86_get_gdt)(vcpu, &dt);
267 smram->gdtr.limit = dt.size;
268 smram->gdtr.base = dt.address;
269
270 enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
271 enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
272 enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
273 enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
274 enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
275 enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
276
277 smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
278}
279#endif
280
281void enter_smm(struct kvm_vcpu *vcpu)
282{
283 struct kvm_segment cs, ds;
284 struct desc_ptr dt;
285 unsigned long cr0;
286 union kvm_smram smram;
287
288 check_smram_offsets();
289
290 memset(smram.bytes, 0, sizeof(smram.bytes));
291
292#ifdef CONFIG_X86_64
293 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
294 enter_smm_save_state_64(vcpu, &smram.smram64);
295 else
296#endif
297 enter_smm_save_state_32(vcpu, &smram.smram32);
298
299 /*
300 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
301 * state (e.g. leave guest mode) after we've saved the state into the
302 * SMM state-save area.
303 *
304 * Kill the VM in the unlikely case of failure, because the VM
305 * can be in undefined state in this case.
306 */
307 if (static_call(kvm_x86_enter_smm)(vcpu, &smram))
308 goto error;
309
310 kvm_smm_changed(vcpu, true);
311
312 if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
313 goto error;
314
315 if (static_call(kvm_x86_get_nmi_mask)(vcpu))
316 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
317 else
318 static_call(kvm_x86_set_nmi_mask)(vcpu, true);
319
320 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
321 kvm_rip_write(vcpu, 0x8000);
322
323 static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
324
325 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
326 static_call(kvm_x86_set_cr0)(vcpu, cr0);
327
328 static_call(kvm_x86_set_cr4)(vcpu, 0);
329
330 /* Undocumented: IDT limit is set to zero on entry to SMM. */
331 dt.address = dt.size = 0;
332 static_call(kvm_x86_set_idt)(vcpu, &dt);
333
334 if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
335 goto error;
336
337 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
338 cs.base = vcpu->arch.smbase;
339
340 ds.selector = 0;
341 ds.base = 0;
342
343 cs.limit = ds.limit = 0xffffffff;
344 cs.type = ds.type = 0x3;
345 cs.dpl = ds.dpl = 0;
346 cs.db = ds.db = 0;
347 cs.s = ds.s = 1;
348 cs.l = ds.l = 0;
349 cs.g = ds.g = 1;
350 cs.avl = ds.avl = 0;
351 cs.present = ds.present = 1;
352 cs.unusable = ds.unusable = 0;
353 cs.padding = ds.padding = 0;
354
355 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
356 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
357 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
358 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
359 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
360 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
361
362#ifdef CONFIG_X86_64
363 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
364 if (static_call(kvm_x86_set_efer)(vcpu, 0))
365 goto error;
366#endif
367
368 kvm_update_cpuid_runtime(vcpu);
369 kvm_mmu_reset_context(vcpu);
370 return;
371error:
372 kvm_vm_dead(vcpu->kvm);
373}
374
375static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
376{
377 desc->g = (flags >> 23) & 1;
378 desc->db = (flags >> 22) & 1;
379 desc->l = (flags >> 21) & 1;
380 desc->avl = (flags >> 20) & 1;
381 desc->present = (flags >> 15) & 1;
382 desc->dpl = (flags >> 13) & 3;
383 desc->s = (flags >> 12) & 1;
384 desc->type = (flags >> 8) & 15;
385
386 desc->unusable = !desc->present;
387 desc->padding = 0;
388}
389
390static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
391 const struct kvm_smm_seg_state_32 *state,
392 u16 selector, int n)
393{
394 struct kvm_segment desc;
395
396 desc.selector = selector;
397 desc.base = state->base;
398 desc.limit = state->limit;
399 rsm_set_desc_flags(&desc, state->flags);
400 kvm_set_segment(vcpu, &desc, n);
401 return X86EMUL_CONTINUE;
402}
403
404#ifdef CONFIG_X86_64
405
406static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
407 const struct kvm_smm_seg_state_64 *state,
408 int n)
409{
410 struct kvm_segment desc;
411
412 desc.selector = state->selector;
413 rsm_set_desc_flags(&desc, state->attributes << 8);
414 desc.limit = state->limit;
415 desc.base = state->base;
416 kvm_set_segment(vcpu, &desc, n);
417 return X86EMUL_CONTINUE;
418}
419#endif
420
421static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
422 u64 cr0, u64 cr3, u64 cr4)
423{
424 int bad;
425 u64 pcid;
426
427 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
428 pcid = 0;
429 if (cr4 & X86_CR4_PCIDE) {
430 pcid = cr3 & 0xfff;
431 cr3 &= ~0xfff;
432 }
433
434 bad = kvm_set_cr3(vcpu, cr3);
435 if (bad)
436 return X86EMUL_UNHANDLEABLE;
437
438 /*
439 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
440 * Then enable protected mode. However, PCID cannot be enabled
441 * if EFER.LMA=0, so set it separately.
442 */
443 bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
444 if (bad)
445 return X86EMUL_UNHANDLEABLE;
446
447 bad = kvm_set_cr0(vcpu, cr0);
448 if (bad)
449 return X86EMUL_UNHANDLEABLE;
450
451 if (cr4 & X86_CR4_PCIDE) {
452 bad = kvm_set_cr4(vcpu, cr4);
453 if (bad)
454 return X86EMUL_UNHANDLEABLE;
455 if (pcid) {
456 bad = kvm_set_cr3(vcpu, cr3 | pcid);
457 if (bad)
458 return X86EMUL_UNHANDLEABLE;
459 }
460
461 }
462
463 return X86EMUL_CONTINUE;
464}
465
466static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
467 const struct kvm_smram_state_32 *smstate)
468{
469 struct kvm_vcpu *vcpu = ctxt->vcpu;
470 struct desc_ptr dt;
471 int i, r;
472
473 ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
474 ctxt->_eip = smstate->eip;
475
476 for (i = 0; i < 8; i++)
477 *reg_write(ctxt, i) = smstate->gprs[i];
478
479 if (kvm_set_dr(vcpu, 6, smstate->dr6))
480 return X86EMUL_UNHANDLEABLE;
481 if (kvm_set_dr(vcpu, 7, smstate->dr7))
482 return X86EMUL_UNHANDLEABLE;
483
484 rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
485 rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
486
487 dt.address = smstate->gdtr.base;
488 dt.size = smstate->gdtr.limit;
489 static_call(kvm_x86_set_gdt)(vcpu, &dt);
490
491 dt.address = smstate->idtr.base;
492 dt.size = smstate->idtr.limit;
493 static_call(kvm_x86_set_idt)(vcpu, &dt);
494
495 rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
496 rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
497 rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
498
499 rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
500 rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
501 rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
502
503 vcpu->arch.smbase = smstate->smbase;
504
505 r = rsm_enter_protected_mode(vcpu, smstate->cr0,
506 smstate->cr3, smstate->cr4);
507
508 if (r != X86EMUL_CONTINUE)
509 return r;
510
511 static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
512 ctxt->interruptibility = (u8)smstate->int_shadow;
513
514 return r;
515}
516
517#ifdef CONFIG_X86_64
518static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
519 const struct kvm_smram_state_64 *smstate)
520{
521 struct kvm_vcpu *vcpu = ctxt->vcpu;
522 struct desc_ptr dt;
523 int i, r;
524
525 for (i = 0; i < 16; i++)
526 *reg_write(ctxt, i) = smstate->gprs[15 - i];
527
528 ctxt->_eip = smstate->rip;
529 ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
530
531 if (kvm_set_dr(vcpu, 6, smstate->dr6))
532 return X86EMUL_UNHANDLEABLE;
533 if (kvm_set_dr(vcpu, 7, smstate->dr7))
534 return X86EMUL_UNHANDLEABLE;
535
536 vcpu->arch.smbase = smstate->smbase;
537
538 if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
539 return X86EMUL_UNHANDLEABLE;
540
541 rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
542
543 dt.size = smstate->idtr.limit;
544 dt.address = smstate->idtr.base;
545 static_call(kvm_x86_set_idt)(vcpu, &dt);
546
547 rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
548
549 dt.size = smstate->gdtr.limit;
550 dt.address = smstate->gdtr.base;
551 static_call(kvm_x86_set_gdt)(vcpu, &dt);
552
553 r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
554 if (r != X86EMUL_CONTINUE)
555 return r;
556
557 rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
558 rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
559 rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
560 rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
561 rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
562 rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
563
564 static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
565 ctxt->interruptibility = (u8)smstate->int_shadow;
566
567 return X86EMUL_CONTINUE;
568}
569#endif
570
571int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
572{
573 struct kvm_vcpu *vcpu = ctxt->vcpu;
574 unsigned long cr0;
575 union kvm_smram smram;
576 u64 smbase;
577 int ret;
578
579 smbase = vcpu->arch.smbase;
580
581 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
582 if (ret < 0)
583 return X86EMUL_UNHANDLEABLE;
584
585 if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
586 static_call(kvm_x86_set_nmi_mask)(vcpu, false);
587
588 kvm_smm_changed(vcpu, false);
589
590 /*
591 * Get back to real mode, to prepare a safe state in which to load
592 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
593 * supports long mode.
594 */
595#ifdef CONFIG_X86_64
596 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
597 struct kvm_segment cs_desc;
598 unsigned long cr4;
599
600 /* Zero CR4.PCIDE before CR0.PG. */
601 cr4 = kvm_read_cr4(vcpu);
602 if (cr4 & X86_CR4_PCIDE)
603 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
604
605 /* A 32-bit code segment is required to clear EFER.LMA. */
606 memset(&cs_desc, 0, sizeof(cs_desc));
607 cs_desc.type = 0xb;
608 cs_desc.s = cs_desc.g = cs_desc.present = 1;
609 kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
610 }
611#endif
612
613 /* For the 64-bit case, this will clear EFER.LMA. */
614 cr0 = kvm_read_cr0(vcpu);
615 if (cr0 & X86_CR0_PE)
616 kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
617
618#ifdef CONFIG_X86_64
619 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
620 unsigned long cr4, efer;
621
622 /* Clear CR4.PAE before clearing EFER.LME. */
623 cr4 = kvm_read_cr4(vcpu);
624 if (cr4 & X86_CR4_PAE)
625 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
626
627 /* And finally go back to 32-bit mode. */
628 efer = 0;
629 kvm_set_msr(vcpu, MSR_EFER, efer);
630 }
631#endif
632
633 /*
634 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
635 * state (e.g. enter guest mode) before loading state from the SMM
636 * state-save area.
637 */
638 if (static_call(kvm_x86_leave_smm)(vcpu, &smram))
639 return X86EMUL_UNHANDLEABLE;
640
641#ifdef CONFIG_X86_64
642 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
643 return rsm_load_state_64(ctxt, &smram.smram64);
644 else
645#endif
646 return rsm_load_state_32(ctxt, &smram.smram32);
647}