Loading...
1/*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17#include <linux/compiler.h>
18#include <linux/err.h>
19#include <linux/fs.h>
20#include <linux/hrtimer.h>
21#include <linux/init.h>
22#include <linux/kvm.h>
23#include <linux/kvm_host.h>
24#include <linux/mman.h>
25#include <linux/module.h>
26#include <linux/random.h>
27#include <linux/slab.h>
28#include <linux/timer.h>
29#include <linux/vmalloc.h>
30#include <linux/bitmap.h>
31#include <asm/asm-offsets.h>
32#include <asm/lowcore.h>
33#include <asm/stp.h>
34#include <asm/pgtable.h>
35#include <asm/gmap.h>
36#include <asm/nmi.h>
37#include <asm/switch_to.h>
38#include <asm/isc.h>
39#include <asm/sclp.h>
40#include <asm/cpacf.h>
41#include <asm/timex.h>
42#include "kvm-s390.h"
43#include "gaccess.h"
44
45#define KMSG_COMPONENT "kvm-s390"
46#undef pr_fmt
47#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48
49#define CREATE_TRACE_POINTS
50#include "trace.h"
51#include "trace-s390.h"
52
53#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
54#define LOCAL_IRQS 32
55#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 (KVM_MAX_VCPUS + LOCAL_IRQS))
57
58#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59
60struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { "userspace_handled", VCPU_STAT(exit_userspace) },
62 { "exit_null", VCPU_STAT(exit_null) },
63 { "exit_validity", VCPU_STAT(exit_validity) },
64 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 { "exit_external_request", VCPU_STAT(exit_external_request) },
66 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 { "exit_instruction", VCPU_STAT(exit_instruction) },
68 { "exit_pei", VCPU_STAT(exit_pei) },
69 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91 { "instruction_spx", VCPU_STAT(instruction_spx) },
92 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93 { "instruction_stap", VCPU_STAT(instruction_stap) },
94 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98 { "instruction_essa", VCPU_STAT(instruction_essa) },
99 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103 { "instruction_sie", VCPU_STAT(instruction_sie) },
104 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120 { "diagnose_10", VCPU_STAT(diagnose_10) },
121 { "diagnose_44", VCPU_STAT(diagnose_44) },
122 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123 { "diagnose_258", VCPU_STAT(diagnose_258) },
124 { "diagnose_308", VCPU_STAT(diagnose_308) },
125 { "diagnose_500", VCPU_STAT(diagnose_500) },
126 { NULL }
127};
128
129/* allow nested virtualization in KVM (if enabled by user space) */
130static int nested;
131module_param(nested, int, S_IRUGO);
132MODULE_PARM_DESC(nested, "Nested virtualization support");
133
134/* upper facilities limit for kvm */
135unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
136
137unsigned long kvm_s390_fac_list_mask_size(void)
138{
139 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140 return ARRAY_SIZE(kvm_s390_fac_list_mask);
141}
142
143/* available cpu features supported by kvm */
144static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145/* available subfunctions indicated via query / "test bit" */
146static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
147
148static struct gmap_notifier gmap_notifier;
149static struct gmap_notifier vsie_gmap_notifier;
150debug_info_t *kvm_s390_dbf;
151
152/* Section: not file related */
153int kvm_arch_hardware_enable(void)
154{
155 /* every s390 is virtualization enabled ;-) */
156 return 0;
157}
158
159static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160 unsigned long end);
161
162/*
163 * This callback is executed during stop_machine(). All CPUs are therefore
164 * temporarily stopped. In order not to change guest behavior, we have to
165 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166 * so a CPU won't be stopped while calculating with the epoch.
167 */
168static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169 void *v)
170{
171 struct kvm *kvm;
172 struct kvm_vcpu *vcpu;
173 int i;
174 unsigned long long *delta = v;
175
176 list_for_each_entry(kvm, &vm_list, vm_list) {
177 kvm->arch.epoch -= *delta;
178 kvm_for_each_vcpu(i, vcpu, kvm) {
179 vcpu->arch.sie_block->epoch -= *delta;
180 if (vcpu->arch.cputm_enabled)
181 vcpu->arch.cputm_start += *delta;
182 if (vcpu->arch.vsie_block)
183 vcpu->arch.vsie_block->epoch -= *delta;
184 }
185 }
186 return NOTIFY_OK;
187}
188
189static struct notifier_block kvm_clock_notifier = {
190 .notifier_call = kvm_clock_sync,
191};
192
193int kvm_arch_hardware_setup(void)
194{
195 gmap_notifier.notifier_call = kvm_gmap_notifier;
196 gmap_register_pte_notifier(&gmap_notifier);
197 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198 gmap_register_pte_notifier(&vsie_gmap_notifier);
199 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200 &kvm_clock_notifier);
201 return 0;
202}
203
204void kvm_arch_hardware_unsetup(void)
205{
206 gmap_unregister_pte_notifier(&gmap_notifier);
207 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209 &kvm_clock_notifier);
210}
211
212static void allow_cpu_feat(unsigned long nr)
213{
214 set_bit_inv(nr, kvm_s390_available_cpu_feat);
215}
216
217static inline int plo_test_bit(unsigned char nr)
218{
219 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220 int cc = 3; /* subfunction not available */
221
222 asm volatile(
223 /* Parameter registers are ignored for "test bit" */
224 " plo 0,0,0,0(0)\n"
225 " ipm %0\n"
226 " srl %0,28\n"
227 : "=d" (cc)
228 : "d" (r0)
229 : "cc");
230 return cc == 0;
231}
232
233static void kvm_s390_cpu_feat_init(void)
234{
235 int i;
236
237 for (i = 0; i < 256; ++i) {
238 if (plo_test_bit(i))
239 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
240 }
241
242 if (test_facility(28)) /* TOD-clock steering */
243 ptff(kvm_s390_available_subfunc.ptff,
244 sizeof(kvm_s390_available_subfunc.ptff),
245 PTFF_QAF);
246
247 if (test_facility(17)) { /* MSA */
248 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249 kvm_s390_available_subfunc.kmac);
250 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251 kvm_s390_available_subfunc.kmc);
252 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.km);
254 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kimd);
256 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.klmd);
258 }
259 if (test_facility(76)) /* MSA3 */
260 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.pckmo);
262 if (test_facility(77)) { /* MSA4 */
263 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.kmctr);
265 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266 kvm_s390_available_subfunc.kmf);
267 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmo);
269 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.pcc);
271 }
272 if (test_facility(57)) /* MSA5 */
273 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.ppno);
275
276 if (MACHINE_HAS_ESOP)
277 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
278 /*
279 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
281 */
282 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283 !test_facility(3) || !nested)
284 return;
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286 if (sclp.has_64bscao)
287 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
288 if (sclp.has_siif)
289 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
290 if (sclp.has_gpere)
291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
292 if (sclp.has_gsls)
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
294 if (sclp.has_ib)
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
296 if (sclp.has_cei)
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
298 if (sclp.has_ibs)
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
300 /*
301 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302 * all skey handling functions read/set the skey from the PGSTE
303 * instead of the real storage key.
304 *
305 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306 * pages being detected as preserved although they are resident.
307 *
308 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
310 *
311 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
314 *
315 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316 * cannot easily shadow the SCA because of the ipte lock.
317 */
318}
319
320int kvm_arch_init(void *opaque)
321{
322 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
323 if (!kvm_s390_dbf)
324 return -ENOMEM;
325
326 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
327 debug_unregister(kvm_s390_dbf);
328 return -ENOMEM;
329 }
330
331 kvm_s390_cpu_feat_init();
332
333 /* Register floating interrupt controller interface. */
334 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
335}
336
337void kvm_arch_exit(void)
338{
339 debug_unregister(kvm_s390_dbf);
340}
341
342/* Section: device related */
343long kvm_arch_dev_ioctl(struct file *filp,
344 unsigned int ioctl, unsigned long arg)
345{
346 if (ioctl == KVM_S390_ENABLE_SIE)
347 return s390_enable_sie();
348 return -EINVAL;
349}
350
351int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
352{
353 int r;
354
355 switch (ext) {
356 case KVM_CAP_S390_PSW:
357 case KVM_CAP_S390_GMAP:
358 case KVM_CAP_SYNC_MMU:
359#ifdef CONFIG_KVM_S390_UCONTROL
360 case KVM_CAP_S390_UCONTROL:
361#endif
362 case KVM_CAP_ASYNC_PF:
363 case KVM_CAP_SYNC_REGS:
364 case KVM_CAP_ONE_REG:
365 case KVM_CAP_ENABLE_CAP:
366 case KVM_CAP_S390_CSS_SUPPORT:
367 case KVM_CAP_IOEVENTFD:
368 case KVM_CAP_DEVICE_CTRL:
369 case KVM_CAP_ENABLE_CAP_VM:
370 case KVM_CAP_S390_IRQCHIP:
371 case KVM_CAP_VM_ATTRIBUTES:
372 case KVM_CAP_MP_STATE:
373 case KVM_CAP_S390_INJECT_IRQ:
374 case KVM_CAP_S390_USER_SIGP:
375 case KVM_CAP_S390_USER_STSI:
376 case KVM_CAP_S390_SKEYS:
377 case KVM_CAP_S390_IRQ_STATE:
378 case KVM_CAP_S390_USER_INSTR0:
379 r = 1;
380 break;
381 case KVM_CAP_S390_MEM_OP:
382 r = MEM_OP_MAX_SIZE;
383 break;
384 case KVM_CAP_NR_VCPUS:
385 case KVM_CAP_MAX_VCPUS:
386 r = KVM_S390_BSCA_CPU_SLOTS;
387 if (!kvm_s390_use_sca_entries())
388 r = KVM_MAX_VCPUS;
389 else if (sclp.has_esca && sclp.has_64bscao)
390 r = KVM_S390_ESCA_CPU_SLOTS;
391 break;
392 case KVM_CAP_NR_MEMSLOTS:
393 r = KVM_USER_MEM_SLOTS;
394 break;
395 case KVM_CAP_S390_COW:
396 r = MACHINE_HAS_ESOP;
397 break;
398 case KVM_CAP_S390_VECTOR_REGISTERS:
399 r = MACHINE_HAS_VX;
400 break;
401 case KVM_CAP_S390_RI:
402 r = test_facility(64);
403 break;
404 default:
405 r = 0;
406 }
407 return r;
408}
409
410static void kvm_s390_sync_dirty_log(struct kvm *kvm,
411 struct kvm_memory_slot *memslot)
412{
413 gfn_t cur_gfn, last_gfn;
414 unsigned long address;
415 struct gmap *gmap = kvm->arch.gmap;
416
417 /* Loop over all guest pages */
418 last_gfn = memslot->base_gfn + memslot->npages;
419 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
420 address = gfn_to_hva_memslot(memslot, cur_gfn);
421
422 if (test_and_clear_guest_dirty(gmap->mm, address))
423 mark_page_dirty(kvm, cur_gfn);
424 if (fatal_signal_pending(current))
425 return;
426 cond_resched();
427 }
428}
429
430/* Section: vm related */
431static void sca_del_vcpu(struct kvm_vcpu *vcpu);
432
433/*
434 * Get (and clear) the dirty memory log for a memory slot.
435 */
436int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
437 struct kvm_dirty_log *log)
438{
439 int r;
440 unsigned long n;
441 struct kvm_memslots *slots;
442 struct kvm_memory_slot *memslot;
443 int is_dirty = 0;
444
445 if (kvm_is_ucontrol(kvm))
446 return -EINVAL;
447
448 mutex_lock(&kvm->slots_lock);
449
450 r = -EINVAL;
451 if (log->slot >= KVM_USER_MEM_SLOTS)
452 goto out;
453
454 slots = kvm_memslots(kvm);
455 memslot = id_to_memslot(slots, log->slot);
456 r = -ENOENT;
457 if (!memslot->dirty_bitmap)
458 goto out;
459
460 kvm_s390_sync_dirty_log(kvm, memslot);
461 r = kvm_get_dirty_log(kvm, log, &is_dirty);
462 if (r)
463 goto out;
464
465 /* Clear the dirty log */
466 if (is_dirty) {
467 n = kvm_dirty_bitmap_bytes(memslot);
468 memset(memslot->dirty_bitmap, 0, n);
469 }
470 r = 0;
471out:
472 mutex_unlock(&kvm->slots_lock);
473 return r;
474}
475
476static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
477{
478 unsigned int i;
479 struct kvm_vcpu *vcpu;
480
481 kvm_for_each_vcpu(i, vcpu, kvm) {
482 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
483 }
484}
485
486static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
487{
488 int r;
489
490 if (cap->flags)
491 return -EINVAL;
492
493 switch (cap->cap) {
494 case KVM_CAP_S390_IRQCHIP:
495 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
496 kvm->arch.use_irqchip = 1;
497 r = 0;
498 break;
499 case KVM_CAP_S390_USER_SIGP:
500 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
501 kvm->arch.user_sigp = 1;
502 r = 0;
503 break;
504 case KVM_CAP_S390_VECTOR_REGISTERS:
505 mutex_lock(&kvm->lock);
506 if (kvm->created_vcpus) {
507 r = -EBUSY;
508 } else if (MACHINE_HAS_VX) {
509 set_kvm_facility(kvm->arch.model.fac_mask, 129);
510 set_kvm_facility(kvm->arch.model.fac_list, 129);
511 r = 0;
512 } else
513 r = -EINVAL;
514 mutex_unlock(&kvm->lock);
515 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
516 r ? "(not available)" : "(success)");
517 break;
518 case KVM_CAP_S390_RI:
519 r = -EINVAL;
520 mutex_lock(&kvm->lock);
521 if (kvm->created_vcpus) {
522 r = -EBUSY;
523 } else if (test_facility(64)) {
524 set_kvm_facility(kvm->arch.model.fac_mask, 64);
525 set_kvm_facility(kvm->arch.model.fac_list, 64);
526 r = 0;
527 }
528 mutex_unlock(&kvm->lock);
529 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
530 r ? "(not available)" : "(success)");
531 break;
532 case KVM_CAP_S390_USER_STSI:
533 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
534 kvm->arch.user_stsi = 1;
535 r = 0;
536 break;
537 case KVM_CAP_S390_USER_INSTR0:
538 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
539 kvm->arch.user_instr0 = 1;
540 icpt_operexc_on_all_vcpus(kvm);
541 r = 0;
542 break;
543 default:
544 r = -EINVAL;
545 break;
546 }
547 return r;
548}
549
550static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
551{
552 int ret;
553
554 switch (attr->attr) {
555 case KVM_S390_VM_MEM_LIMIT_SIZE:
556 ret = 0;
557 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
558 kvm->arch.mem_limit);
559 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
560 ret = -EFAULT;
561 break;
562 default:
563 ret = -ENXIO;
564 break;
565 }
566 return ret;
567}
568
569static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
570{
571 int ret;
572 unsigned int idx;
573 switch (attr->attr) {
574 case KVM_S390_VM_MEM_ENABLE_CMMA:
575 ret = -ENXIO;
576 if (!sclp.has_cmma)
577 break;
578
579 ret = -EBUSY;
580 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
581 mutex_lock(&kvm->lock);
582 if (!kvm->created_vcpus) {
583 kvm->arch.use_cmma = 1;
584 ret = 0;
585 }
586 mutex_unlock(&kvm->lock);
587 break;
588 case KVM_S390_VM_MEM_CLR_CMMA:
589 ret = -ENXIO;
590 if (!sclp.has_cmma)
591 break;
592 ret = -EINVAL;
593 if (!kvm->arch.use_cmma)
594 break;
595
596 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
597 mutex_lock(&kvm->lock);
598 idx = srcu_read_lock(&kvm->srcu);
599 s390_reset_cmma(kvm->arch.gmap->mm);
600 srcu_read_unlock(&kvm->srcu, idx);
601 mutex_unlock(&kvm->lock);
602 ret = 0;
603 break;
604 case KVM_S390_VM_MEM_LIMIT_SIZE: {
605 unsigned long new_limit;
606
607 if (kvm_is_ucontrol(kvm))
608 return -EINVAL;
609
610 if (get_user(new_limit, (u64 __user *)attr->addr))
611 return -EFAULT;
612
613 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
614 new_limit > kvm->arch.mem_limit)
615 return -E2BIG;
616
617 if (!new_limit)
618 return -EINVAL;
619
620 /* gmap_create takes last usable address */
621 if (new_limit != KVM_S390_NO_MEM_LIMIT)
622 new_limit -= 1;
623
624 ret = -EBUSY;
625 mutex_lock(&kvm->lock);
626 if (!kvm->created_vcpus) {
627 /* gmap_create will round the limit up */
628 struct gmap *new = gmap_create(current->mm, new_limit);
629
630 if (!new) {
631 ret = -ENOMEM;
632 } else {
633 gmap_remove(kvm->arch.gmap);
634 new->private = kvm;
635 kvm->arch.gmap = new;
636 ret = 0;
637 }
638 }
639 mutex_unlock(&kvm->lock);
640 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
641 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
642 (void *) kvm->arch.gmap->asce);
643 break;
644 }
645 default:
646 ret = -ENXIO;
647 break;
648 }
649 return ret;
650}
651
652static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
653
654static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
655{
656 struct kvm_vcpu *vcpu;
657 int i;
658
659 if (!test_kvm_facility(kvm, 76))
660 return -EINVAL;
661
662 mutex_lock(&kvm->lock);
663 switch (attr->attr) {
664 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
665 get_random_bytes(
666 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
667 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
668 kvm->arch.crypto.aes_kw = 1;
669 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
670 break;
671 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
672 get_random_bytes(
673 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
674 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
675 kvm->arch.crypto.dea_kw = 1;
676 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
677 break;
678 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
679 kvm->arch.crypto.aes_kw = 0;
680 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
681 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
682 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
683 break;
684 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
685 kvm->arch.crypto.dea_kw = 0;
686 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
687 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
688 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
689 break;
690 default:
691 mutex_unlock(&kvm->lock);
692 return -ENXIO;
693 }
694
695 kvm_for_each_vcpu(i, vcpu, kvm) {
696 kvm_s390_vcpu_crypto_setup(vcpu);
697 exit_sie(vcpu);
698 }
699 mutex_unlock(&kvm->lock);
700 return 0;
701}
702
703static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
704{
705 u8 gtod_high;
706
707 if (copy_from_user(>od_high, (void __user *)attr->addr,
708 sizeof(gtod_high)))
709 return -EFAULT;
710
711 if (gtod_high != 0)
712 return -EINVAL;
713 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
714
715 return 0;
716}
717
718static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
719{
720 u64 gtod;
721
722 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
723 return -EFAULT;
724
725 kvm_s390_set_tod_clock(kvm, gtod);
726 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
727 return 0;
728}
729
730static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
731{
732 int ret;
733
734 if (attr->flags)
735 return -EINVAL;
736
737 switch (attr->attr) {
738 case KVM_S390_VM_TOD_HIGH:
739 ret = kvm_s390_set_tod_high(kvm, attr);
740 break;
741 case KVM_S390_VM_TOD_LOW:
742 ret = kvm_s390_set_tod_low(kvm, attr);
743 break;
744 default:
745 ret = -ENXIO;
746 break;
747 }
748 return ret;
749}
750
751static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
752{
753 u8 gtod_high = 0;
754
755 if (copy_to_user((void __user *)attr->addr, >od_high,
756 sizeof(gtod_high)))
757 return -EFAULT;
758 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
759
760 return 0;
761}
762
763static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
764{
765 u64 gtod;
766
767 gtod = kvm_s390_get_tod_clock_fast(kvm);
768 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
769 return -EFAULT;
770 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
771
772 return 0;
773}
774
775static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
776{
777 int ret;
778
779 if (attr->flags)
780 return -EINVAL;
781
782 switch (attr->attr) {
783 case KVM_S390_VM_TOD_HIGH:
784 ret = kvm_s390_get_tod_high(kvm, attr);
785 break;
786 case KVM_S390_VM_TOD_LOW:
787 ret = kvm_s390_get_tod_low(kvm, attr);
788 break;
789 default:
790 ret = -ENXIO;
791 break;
792 }
793 return ret;
794}
795
796static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
797{
798 struct kvm_s390_vm_cpu_processor *proc;
799 u16 lowest_ibc, unblocked_ibc;
800 int ret = 0;
801
802 mutex_lock(&kvm->lock);
803 if (kvm->created_vcpus) {
804 ret = -EBUSY;
805 goto out;
806 }
807 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
808 if (!proc) {
809 ret = -ENOMEM;
810 goto out;
811 }
812 if (!copy_from_user(proc, (void __user *)attr->addr,
813 sizeof(*proc))) {
814 kvm->arch.model.cpuid = proc->cpuid;
815 lowest_ibc = sclp.ibc >> 16 & 0xfff;
816 unblocked_ibc = sclp.ibc & 0xfff;
817 if (lowest_ibc && proc->ibc) {
818 if (proc->ibc > unblocked_ibc)
819 kvm->arch.model.ibc = unblocked_ibc;
820 else if (proc->ibc < lowest_ibc)
821 kvm->arch.model.ibc = lowest_ibc;
822 else
823 kvm->arch.model.ibc = proc->ibc;
824 }
825 memcpy(kvm->arch.model.fac_list, proc->fac_list,
826 S390_ARCH_FAC_LIST_SIZE_BYTE);
827 } else
828 ret = -EFAULT;
829 kfree(proc);
830out:
831 mutex_unlock(&kvm->lock);
832 return ret;
833}
834
835static int kvm_s390_set_processor_feat(struct kvm *kvm,
836 struct kvm_device_attr *attr)
837{
838 struct kvm_s390_vm_cpu_feat data;
839 int ret = -EBUSY;
840
841 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
842 return -EFAULT;
843 if (!bitmap_subset((unsigned long *) data.feat,
844 kvm_s390_available_cpu_feat,
845 KVM_S390_VM_CPU_FEAT_NR_BITS))
846 return -EINVAL;
847
848 mutex_lock(&kvm->lock);
849 if (!atomic_read(&kvm->online_vcpus)) {
850 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
851 KVM_S390_VM_CPU_FEAT_NR_BITS);
852 ret = 0;
853 }
854 mutex_unlock(&kvm->lock);
855 return ret;
856}
857
858static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
859 struct kvm_device_attr *attr)
860{
861 /*
862 * Once supported by kernel + hw, we have to store the subfunctions
863 * in kvm->arch and remember that user space configured them.
864 */
865 return -ENXIO;
866}
867
868static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
869{
870 int ret = -ENXIO;
871
872 switch (attr->attr) {
873 case KVM_S390_VM_CPU_PROCESSOR:
874 ret = kvm_s390_set_processor(kvm, attr);
875 break;
876 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
877 ret = kvm_s390_set_processor_feat(kvm, attr);
878 break;
879 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
880 ret = kvm_s390_set_processor_subfunc(kvm, attr);
881 break;
882 }
883 return ret;
884}
885
886static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
887{
888 struct kvm_s390_vm_cpu_processor *proc;
889 int ret = 0;
890
891 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
892 if (!proc) {
893 ret = -ENOMEM;
894 goto out;
895 }
896 proc->cpuid = kvm->arch.model.cpuid;
897 proc->ibc = kvm->arch.model.ibc;
898 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
899 S390_ARCH_FAC_LIST_SIZE_BYTE);
900 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
901 ret = -EFAULT;
902 kfree(proc);
903out:
904 return ret;
905}
906
907static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
908{
909 struct kvm_s390_vm_cpu_machine *mach;
910 int ret = 0;
911
912 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
913 if (!mach) {
914 ret = -ENOMEM;
915 goto out;
916 }
917 get_cpu_id((struct cpuid *) &mach->cpuid);
918 mach->ibc = sclp.ibc;
919 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
920 S390_ARCH_FAC_LIST_SIZE_BYTE);
921 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
922 sizeof(S390_lowcore.stfle_fac_list));
923 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
924 ret = -EFAULT;
925 kfree(mach);
926out:
927 return ret;
928}
929
930static int kvm_s390_get_processor_feat(struct kvm *kvm,
931 struct kvm_device_attr *attr)
932{
933 struct kvm_s390_vm_cpu_feat data;
934
935 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
936 KVM_S390_VM_CPU_FEAT_NR_BITS);
937 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
938 return -EFAULT;
939 return 0;
940}
941
942static int kvm_s390_get_machine_feat(struct kvm *kvm,
943 struct kvm_device_attr *attr)
944{
945 struct kvm_s390_vm_cpu_feat data;
946
947 bitmap_copy((unsigned long *) data.feat,
948 kvm_s390_available_cpu_feat,
949 KVM_S390_VM_CPU_FEAT_NR_BITS);
950 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
951 return -EFAULT;
952 return 0;
953}
954
955static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
956 struct kvm_device_attr *attr)
957{
958 /*
959 * Once we can actually configure subfunctions (kernel + hw support),
960 * we have to check if they were already set by user space, if so copy
961 * them from kvm->arch.
962 */
963 return -ENXIO;
964}
965
966static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
967 struct kvm_device_attr *attr)
968{
969 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
970 sizeof(struct kvm_s390_vm_cpu_subfunc)))
971 return -EFAULT;
972 return 0;
973}
974static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
975{
976 int ret = -ENXIO;
977
978 switch (attr->attr) {
979 case KVM_S390_VM_CPU_PROCESSOR:
980 ret = kvm_s390_get_processor(kvm, attr);
981 break;
982 case KVM_S390_VM_CPU_MACHINE:
983 ret = kvm_s390_get_machine(kvm, attr);
984 break;
985 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
986 ret = kvm_s390_get_processor_feat(kvm, attr);
987 break;
988 case KVM_S390_VM_CPU_MACHINE_FEAT:
989 ret = kvm_s390_get_machine_feat(kvm, attr);
990 break;
991 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
992 ret = kvm_s390_get_processor_subfunc(kvm, attr);
993 break;
994 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
995 ret = kvm_s390_get_machine_subfunc(kvm, attr);
996 break;
997 }
998 return ret;
999}
1000
1001static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1002{
1003 int ret;
1004
1005 switch (attr->group) {
1006 case KVM_S390_VM_MEM_CTRL:
1007 ret = kvm_s390_set_mem_control(kvm, attr);
1008 break;
1009 case KVM_S390_VM_TOD:
1010 ret = kvm_s390_set_tod(kvm, attr);
1011 break;
1012 case KVM_S390_VM_CPU_MODEL:
1013 ret = kvm_s390_set_cpu_model(kvm, attr);
1014 break;
1015 case KVM_S390_VM_CRYPTO:
1016 ret = kvm_s390_vm_set_crypto(kvm, attr);
1017 break;
1018 default:
1019 ret = -ENXIO;
1020 break;
1021 }
1022
1023 return ret;
1024}
1025
1026static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1027{
1028 int ret;
1029
1030 switch (attr->group) {
1031 case KVM_S390_VM_MEM_CTRL:
1032 ret = kvm_s390_get_mem_control(kvm, attr);
1033 break;
1034 case KVM_S390_VM_TOD:
1035 ret = kvm_s390_get_tod(kvm, attr);
1036 break;
1037 case KVM_S390_VM_CPU_MODEL:
1038 ret = kvm_s390_get_cpu_model(kvm, attr);
1039 break;
1040 default:
1041 ret = -ENXIO;
1042 break;
1043 }
1044
1045 return ret;
1046}
1047
1048static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1049{
1050 int ret;
1051
1052 switch (attr->group) {
1053 case KVM_S390_VM_MEM_CTRL:
1054 switch (attr->attr) {
1055 case KVM_S390_VM_MEM_ENABLE_CMMA:
1056 case KVM_S390_VM_MEM_CLR_CMMA:
1057 ret = sclp.has_cmma ? 0 : -ENXIO;
1058 break;
1059 case KVM_S390_VM_MEM_LIMIT_SIZE:
1060 ret = 0;
1061 break;
1062 default:
1063 ret = -ENXIO;
1064 break;
1065 }
1066 break;
1067 case KVM_S390_VM_TOD:
1068 switch (attr->attr) {
1069 case KVM_S390_VM_TOD_LOW:
1070 case KVM_S390_VM_TOD_HIGH:
1071 ret = 0;
1072 break;
1073 default:
1074 ret = -ENXIO;
1075 break;
1076 }
1077 break;
1078 case KVM_S390_VM_CPU_MODEL:
1079 switch (attr->attr) {
1080 case KVM_S390_VM_CPU_PROCESSOR:
1081 case KVM_S390_VM_CPU_MACHINE:
1082 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1083 case KVM_S390_VM_CPU_MACHINE_FEAT:
1084 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1085 ret = 0;
1086 break;
1087 /* configuring subfunctions is not supported yet */
1088 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1089 default:
1090 ret = -ENXIO;
1091 break;
1092 }
1093 break;
1094 case KVM_S390_VM_CRYPTO:
1095 switch (attr->attr) {
1096 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1097 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1098 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1099 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1100 ret = 0;
1101 break;
1102 default:
1103 ret = -ENXIO;
1104 break;
1105 }
1106 break;
1107 default:
1108 ret = -ENXIO;
1109 break;
1110 }
1111
1112 return ret;
1113}
1114
1115static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1116{
1117 uint8_t *keys;
1118 uint64_t hva;
1119 int i, r = 0;
1120
1121 if (args->flags != 0)
1122 return -EINVAL;
1123
1124 /* Is this guest using storage keys? */
1125 if (!mm_use_skey(current->mm))
1126 return KVM_S390_GET_SKEYS_NONE;
1127
1128 /* Enforce sane limit on memory allocation */
1129 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1130 return -EINVAL;
1131
1132 keys = kmalloc_array(args->count, sizeof(uint8_t),
1133 GFP_KERNEL | __GFP_NOWARN);
1134 if (!keys)
1135 keys = vmalloc(sizeof(uint8_t) * args->count);
1136 if (!keys)
1137 return -ENOMEM;
1138
1139 down_read(¤t->mm->mmap_sem);
1140 for (i = 0; i < args->count; i++) {
1141 hva = gfn_to_hva(kvm, args->start_gfn + i);
1142 if (kvm_is_error_hva(hva)) {
1143 r = -EFAULT;
1144 break;
1145 }
1146
1147 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1148 if (r)
1149 break;
1150 }
1151 up_read(¤t->mm->mmap_sem);
1152
1153 if (!r) {
1154 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1155 sizeof(uint8_t) * args->count);
1156 if (r)
1157 r = -EFAULT;
1158 }
1159
1160 kvfree(keys);
1161 return r;
1162}
1163
1164static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1165{
1166 uint8_t *keys;
1167 uint64_t hva;
1168 int i, r = 0;
1169
1170 if (args->flags != 0)
1171 return -EINVAL;
1172
1173 /* Enforce sane limit on memory allocation */
1174 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1175 return -EINVAL;
1176
1177 keys = kmalloc_array(args->count, sizeof(uint8_t),
1178 GFP_KERNEL | __GFP_NOWARN);
1179 if (!keys)
1180 keys = vmalloc(sizeof(uint8_t) * args->count);
1181 if (!keys)
1182 return -ENOMEM;
1183
1184 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1185 sizeof(uint8_t) * args->count);
1186 if (r) {
1187 r = -EFAULT;
1188 goto out;
1189 }
1190
1191 /* Enable storage key handling for the guest */
1192 r = s390_enable_skey();
1193 if (r)
1194 goto out;
1195
1196 down_read(¤t->mm->mmap_sem);
1197 for (i = 0; i < args->count; i++) {
1198 hva = gfn_to_hva(kvm, args->start_gfn + i);
1199 if (kvm_is_error_hva(hva)) {
1200 r = -EFAULT;
1201 break;
1202 }
1203
1204 /* Lowest order bit is reserved */
1205 if (keys[i] & 0x01) {
1206 r = -EINVAL;
1207 break;
1208 }
1209
1210 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1211 if (r)
1212 break;
1213 }
1214 up_read(¤t->mm->mmap_sem);
1215out:
1216 kvfree(keys);
1217 return r;
1218}
1219
1220long kvm_arch_vm_ioctl(struct file *filp,
1221 unsigned int ioctl, unsigned long arg)
1222{
1223 struct kvm *kvm = filp->private_data;
1224 void __user *argp = (void __user *)arg;
1225 struct kvm_device_attr attr;
1226 int r;
1227
1228 switch (ioctl) {
1229 case KVM_S390_INTERRUPT: {
1230 struct kvm_s390_interrupt s390int;
1231
1232 r = -EFAULT;
1233 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1234 break;
1235 r = kvm_s390_inject_vm(kvm, &s390int);
1236 break;
1237 }
1238 case KVM_ENABLE_CAP: {
1239 struct kvm_enable_cap cap;
1240 r = -EFAULT;
1241 if (copy_from_user(&cap, argp, sizeof(cap)))
1242 break;
1243 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1244 break;
1245 }
1246 case KVM_CREATE_IRQCHIP: {
1247 struct kvm_irq_routing_entry routing;
1248
1249 r = -EINVAL;
1250 if (kvm->arch.use_irqchip) {
1251 /* Set up dummy routing. */
1252 memset(&routing, 0, sizeof(routing));
1253 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1254 }
1255 break;
1256 }
1257 case KVM_SET_DEVICE_ATTR: {
1258 r = -EFAULT;
1259 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1260 break;
1261 r = kvm_s390_vm_set_attr(kvm, &attr);
1262 break;
1263 }
1264 case KVM_GET_DEVICE_ATTR: {
1265 r = -EFAULT;
1266 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1267 break;
1268 r = kvm_s390_vm_get_attr(kvm, &attr);
1269 break;
1270 }
1271 case KVM_HAS_DEVICE_ATTR: {
1272 r = -EFAULT;
1273 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1274 break;
1275 r = kvm_s390_vm_has_attr(kvm, &attr);
1276 break;
1277 }
1278 case KVM_S390_GET_SKEYS: {
1279 struct kvm_s390_skeys args;
1280
1281 r = -EFAULT;
1282 if (copy_from_user(&args, argp,
1283 sizeof(struct kvm_s390_skeys)))
1284 break;
1285 r = kvm_s390_get_skeys(kvm, &args);
1286 break;
1287 }
1288 case KVM_S390_SET_SKEYS: {
1289 struct kvm_s390_skeys args;
1290
1291 r = -EFAULT;
1292 if (copy_from_user(&args, argp,
1293 sizeof(struct kvm_s390_skeys)))
1294 break;
1295 r = kvm_s390_set_skeys(kvm, &args);
1296 break;
1297 }
1298 default:
1299 r = -ENOTTY;
1300 }
1301
1302 return r;
1303}
1304
1305static int kvm_s390_query_ap_config(u8 *config)
1306{
1307 u32 fcn_code = 0x04000000UL;
1308 u32 cc = 0;
1309
1310 memset(config, 0, 128);
1311 asm volatile(
1312 "lgr 0,%1\n"
1313 "lgr 2,%2\n"
1314 ".long 0xb2af0000\n" /* PQAP(QCI) */
1315 "0: ipm %0\n"
1316 "srl %0,28\n"
1317 "1:\n"
1318 EX_TABLE(0b, 1b)
1319 : "+r" (cc)
1320 : "r" (fcn_code), "r" (config)
1321 : "cc", "0", "2", "memory"
1322 );
1323
1324 return cc;
1325}
1326
1327static int kvm_s390_apxa_installed(void)
1328{
1329 u8 config[128];
1330 int cc;
1331
1332 if (test_facility(12)) {
1333 cc = kvm_s390_query_ap_config(config);
1334
1335 if (cc)
1336 pr_err("PQAP(QCI) failed with cc=%d", cc);
1337 else
1338 return config[0] & 0x40;
1339 }
1340
1341 return 0;
1342}
1343
1344static void kvm_s390_set_crycb_format(struct kvm *kvm)
1345{
1346 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1347
1348 if (kvm_s390_apxa_installed())
1349 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1350 else
1351 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1352}
1353
1354static u64 kvm_s390_get_initial_cpuid(void)
1355{
1356 struct cpuid cpuid;
1357
1358 get_cpu_id(&cpuid);
1359 cpuid.version = 0xff;
1360 return *((u64 *) &cpuid);
1361}
1362
1363static void kvm_s390_crypto_init(struct kvm *kvm)
1364{
1365 if (!test_kvm_facility(kvm, 76))
1366 return;
1367
1368 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1369 kvm_s390_set_crycb_format(kvm);
1370
1371 /* Enable AES/DEA protected key functions by default */
1372 kvm->arch.crypto.aes_kw = 1;
1373 kvm->arch.crypto.dea_kw = 1;
1374 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1375 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1376 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1377 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1378}
1379
1380static void sca_dispose(struct kvm *kvm)
1381{
1382 if (kvm->arch.use_esca)
1383 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1384 else
1385 free_page((unsigned long)(kvm->arch.sca));
1386 kvm->arch.sca = NULL;
1387}
1388
1389int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1390{
1391 gfp_t alloc_flags = GFP_KERNEL;
1392 int i, rc;
1393 char debug_name[16];
1394 static unsigned long sca_offset;
1395
1396 rc = -EINVAL;
1397#ifdef CONFIG_KVM_S390_UCONTROL
1398 if (type & ~KVM_VM_S390_UCONTROL)
1399 goto out_err;
1400 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1401 goto out_err;
1402#else
1403 if (type)
1404 goto out_err;
1405#endif
1406
1407 rc = s390_enable_sie();
1408 if (rc)
1409 goto out_err;
1410
1411 rc = -ENOMEM;
1412
1413 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1414
1415 kvm->arch.use_esca = 0; /* start with basic SCA */
1416 if (!sclp.has_64bscao)
1417 alloc_flags |= GFP_DMA;
1418 rwlock_init(&kvm->arch.sca_lock);
1419 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1420 if (!kvm->arch.sca)
1421 goto out_err;
1422 spin_lock(&kvm_lock);
1423 sca_offset += 16;
1424 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1425 sca_offset = 0;
1426 kvm->arch.sca = (struct bsca_block *)
1427 ((char *) kvm->arch.sca + sca_offset);
1428 spin_unlock(&kvm_lock);
1429
1430 sprintf(debug_name, "kvm-%u", current->pid);
1431
1432 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1433 if (!kvm->arch.dbf)
1434 goto out_err;
1435
1436 kvm->arch.sie_page2 =
1437 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1438 if (!kvm->arch.sie_page2)
1439 goto out_err;
1440
1441 /* Populate the facility mask initially. */
1442 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1443 sizeof(S390_lowcore.stfle_fac_list));
1444 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1445 if (i < kvm_s390_fac_list_mask_size())
1446 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1447 else
1448 kvm->arch.model.fac_mask[i] = 0UL;
1449 }
1450
1451 /* Populate the facility list initially. */
1452 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1453 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1454 S390_ARCH_FAC_LIST_SIZE_BYTE);
1455
1456 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1457 set_kvm_facility(kvm->arch.model.fac_list, 74);
1458
1459 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1460 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1461
1462 kvm_s390_crypto_init(kvm);
1463
1464 spin_lock_init(&kvm->arch.float_int.lock);
1465 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1466 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1467 init_waitqueue_head(&kvm->arch.ipte_wq);
1468 mutex_init(&kvm->arch.ipte_mutex);
1469
1470 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1471 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1472
1473 if (type & KVM_VM_S390_UCONTROL) {
1474 kvm->arch.gmap = NULL;
1475 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1476 } else {
1477 if (sclp.hamax == U64_MAX)
1478 kvm->arch.mem_limit = TASK_MAX_SIZE;
1479 else
1480 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1481 sclp.hamax + 1);
1482 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1483 if (!kvm->arch.gmap)
1484 goto out_err;
1485 kvm->arch.gmap->private = kvm;
1486 kvm->arch.gmap->pfault_enabled = 0;
1487 }
1488
1489 kvm->arch.css_support = 0;
1490 kvm->arch.use_irqchip = 0;
1491 kvm->arch.epoch = 0;
1492
1493 spin_lock_init(&kvm->arch.start_stop_lock);
1494 kvm_s390_vsie_init(kvm);
1495 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1496
1497 return 0;
1498out_err:
1499 free_page((unsigned long)kvm->arch.sie_page2);
1500 debug_unregister(kvm->arch.dbf);
1501 sca_dispose(kvm);
1502 KVM_EVENT(3, "creation of vm failed: %d", rc);
1503 return rc;
1504}
1505
1506bool kvm_arch_has_vcpu_debugfs(void)
1507{
1508 return false;
1509}
1510
1511int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1512{
1513 return 0;
1514}
1515
1516void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1517{
1518 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1519 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1520 kvm_s390_clear_local_irqs(vcpu);
1521 kvm_clear_async_pf_completion_queue(vcpu);
1522 if (!kvm_is_ucontrol(vcpu->kvm))
1523 sca_del_vcpu(vcpu);
1524
1525 if (kvm_is_ucontrol(vcpu->kvm))
1526 gmap_remove(vcpu->arch.gmap);
1527
1528 if (vcpu->kvm->arch.use_cmma)
1529 kvm_s390_vcpu_unsetup_cmma(vcpu);
1530 free_page((unsigned long)(vcpu->arch.sie_block));
1531
1532 kvm_vcpu_uninit(vcpu);
1533 kmem_cache_free(kvm_vcpu_cache, vcpu);
1534}
1535
1536static void kvm_free_vcpus(struct kvm *kvm)
1537{
1538 unsigned int i;
1539 struct kvm_vcpu *vcpu;
1540
1541 kvm_for_each_vcpu(i, vcpu, kvm)
1542 kvm_arch_vcpu_destroy(vcpu);
1543
1544 mutex_lock(&kvm->lock);
1545 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1546 kvm->vcpus[i] = NULL;
1547
1548 atomic_set(&kvm->online_vcpus, 0);
1549 mutex_unlock(&kvm->lock);
1550}
1551
1552void kvm_arch_destroy_vm(struct kvm *kvm)
1553{
1554 kvm_free_vcpus(kvm);
1555 sca_dispose(kvm);
1556 debug_unregister(kvm->arch.dbf);
1557 free_page((unsigned long)kvm->arch.sie_page2);
1558 if (!kvm_is_ucontrol(kvm))
1559 gmap_remove(kvm->arch.gmap);
1560 kvm_s390_destroy_adapters(kvm);
1561 kvm_s390_clear_float_irqs(kvm);
1562 kvm_s390_vsie_destroy(kvm);
1563 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1564}
1565
1566/* Section: vcpu related */
1567static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1568{
1569 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1570 if (!vcpu->arch.gmap)
1571 return -ENOMEM;
1572 vcpu->arch.gmap->private = vcpu->kvm;
1573
1574 return 0;
1575}
1576
1577static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1578{
1579 if (!kvm_s390_use_sca_entries())
1580 return;
1581 read_lock(&vcpu->kvm->arch.sca_lock);
1582 if (vcpu->kvm->arch.use_esca) {
1583 struct esca_block *sca = vcpu->kvm->arch.sca;
1584
1585 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1586 sca->cpu[vcpu->vcpu_id].sda = 0;
1587 } else {
1588 struct bsca_block *sca = vcpu->kvm->arch.sca;
1589
1590 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1591 sca->cpu[vcpu->vcpu_id].sda = 0;
1592 }
1593 read_unlock(&vcpu->kvm->arch.sca_lock);
1594}
1595
1596static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1597{
1598 if (!kvm_s390_use_sca_entries()) {
1599 struct bsca_block *sca = vcpu->kvm->arch.sca;
1600
1601 /* we still need the basic sca for the ipte control */
1602 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1603 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1604 }
1605 read_lock(&vcpu->kvm->arch.sca_lock);
1606 if (vcpu->kvm->arch.use_esca) {
1607 struct esca_block *sca = vcpu->kvm->arch.sca;
1608
1609 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1610 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1611 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1612 vcpu->arch.sie_block->ecb2 |= 0x04U;
1613 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1614 } else {
1615 struct bsca_block *sca = vcpu->kvm->arch.sca;
1616
1617 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1618 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1619 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1620 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1621 }
1622 read_unlock(&vcpu->kvm->arch.sca_lock);
1623}
1624
1625/* Basic SCA to Extended SCA data copy routines */
1626static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1627{
1628 d->sda = s->sda;
1629 d->sigp_ctrl.c = s->sigp_ctrl.c;
1630 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1631}
1632
1633static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1634{
1635 int i;
1636
1637 d->ipte_control = s->ipte_control;
1638 d->mcn[0] = s->mcn;
1639 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1640 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1641}
1642
1643static int sca_switch_to_extended(struct kvm *kvm)
1644{
1645 struct bsca_block *old_sca = kvm->arch.sca;
1646 struct esca_block *new_sca;
1647 struct kvm_vcpu *vcpu;
1648 unsigned int vcpu_idx;
1649 u32 scaol, scaoh;
1650
1651 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1652 if (!new_sca)
1653 return -ENOMEM;
1654
1655 scaoh = (u32)((u64)(new_sca) >> 32);
1656 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1657
1658 kvm_s390_vcpu_block_all(kvm);
1659 write_lock(&kvm->arch.sca_lock);
1660
1661 sca_copy_b_to_e(new_sca, old_sca);
1662
1663 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1664 vcpu->arch.sie_block->scaoh = scaoh;
1665 vcpu->arch.sie_block->scaol = scaol;
1666 vcpu->arch.sie_block->ecb2 |= 0x04U;
1667 }
1668 kvm->arch.sca = new_sca;
1669 kvm->arch.use_esca = 1;
1670
1671 write_unlock(&kvm->arch.sca_lock);
1672 kvm_s390_vcpu_unblock_all(kvm);
1673
1674 free_page((unsigned long)old_sca);
1675
1676 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1677 old_sca, kvm->arch.sca);
1678 return 0;
1679}
1680
1681static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1682{
1683 int rc;
1684
1685 if (!kvm_s390_use_sca_entries()) {
1686 if (id < KVM_MAX_VCPUS)
1687 return true;
1688 return false;
1689 }
1690 if (id < KVM_S390_BSCA_CPU_SLOTS)
1691 return true;
1692 if (!sclp.has_esca || !sclp.has_64bscao)
1693 return false;
1694
1695 mutex_lock(&kvm->lock);
1696 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1697 mutex_unlock(&kvm->lock);
1698
1699 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1700}
1701
1702int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1703{
1704 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1705 kvm_clear_async_pf_completion_queue(vcpu);
1706 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1707 KVM_SYNC_GPRS |
1708 KVM_SYNC_ACRS |
1709 KVM_SYNC_CRS |
1710 KVM_SYNC_ARCH0 |
1711 KVM_SYNC_PFAULT;
1712 kvm_s390_set_prefix(vcpu, 0);
1713 if (test_kvm_facility(vcpu->kvm, 64))
1714 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1715 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1716 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1717 */
1718 if (MACHINE_HAS_VX)
1719 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1720 else
1721 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1722
1723 if (kvm_is_ucontrol(vcpu->kvm))
1724 return __kvm_ucontrol_vcpu_init(vcpu);
1725
1726 return 0;
1727}
1728
1729/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1730static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1731{
1732 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1733 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1734 vcpu->arch.cputm_start = get_tod_clock_fast();
1735 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1736}
1737
1738/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1739static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1740{
1741 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1742 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1743 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1744 vcpu->arch.cputm_start = 0;
1745 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1746}
1747
1748/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1749static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1750{
1751 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1752 vcpu->arch.cputm_enabled = true;
1753 __start_cpu_timer_accounting(vcpu);
1754}
1755
1756/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1757static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1758{
1759 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1760 __stop_cpu_timer_accounting(vcpu);
1761 vcpu->arch.cputm_enabled = false;
1762}
1763
1764static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1765{
1766 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1767 __enable_cpu_timer_accounting(vcpu);
1768 preempt_enable();
1769}
1770
1771static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1772{
1773 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1774 __disable_cpu_timer_accounting(vcpu);
1775 preempt_enable();
1776}
1777
1778/* set the cpu timer - may only be called from the VCPU thread itself */
1779void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1780{
1781 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1782 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1783 if (vcpu->arch.cputm_enabled)
1784 vcpu->arch.cputm_start = get_tod_clock_fast();
1785 vcpu->arch.sie_block->cputm = cputm;
1786 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1787 preempt_enable();
1788}
1789
1790/* update and get the cpu timer - can also be called from other VCPU threads */
1791__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1792{
1793 unsigned int seq;
1794 __u64 value;
1795
1796 if (unlikely(!vcpu->arch.cputm_enabled))
1797 return vcpu->arch.sie_block->cputm;
1798
1799 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1800 do {
1801 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1802 /*
1803 * If the writer would ever execute a read in the critical
1804 * section, e.g. in irq context, we have a deadlock.
1805 */
1806 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1807 value = vcpu->arch.sie_block->cputm;
1808 /* if cputm_start is 0, accounting is being started/stopped */
1809 if (likely(vcpu->arch.cputm_start))
1810 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1811 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1812 preempt_enable();
1813 return value;
1814}
1815
1816void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1817{
1818
1819 gmap_enable(vcpu->arch.enabled_gmap);
1820 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1821 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1822 __start_cpu_timer_accounting(vcpu);
1823 vcpu->cpu = cpu;
1824}
1825
1826void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1827{
1828 vcpu->cpu = -1;
1829 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1830 __stop_cpu_timer_accounting(vcpu);
1831 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1832 vcpu->arch.enabled_gmap = gmap_get_enabled();
1833 gmap_disable(vcpu->arch.enabled_gmap);
1834
1835}
1836
1837static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1838{
1839 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1840 vcpu->arch.sie_block->gpsw.mask = 0UL;
1841 vcpu->arch.sie_block->gpsw.addr = 0UL;
1842 kvm_s390_set_prefix(vcpu, 0);
1843 kvm_s390_set_cpu_timer(vcpu, 0);
1844 vcpu->arch.sie_block->ckc = 0UL;
1845 vcpu->arch.sie_block->todpr = 0;
1846 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1847 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1848 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1849 /* make sure the new fpc will be lazily loaded */
1850 save_fpu_regs();
1851 current->thread.fpu.fpc = 0;
1852 vcpu->arch.sie_block->gbea = 1;
1853 vcpu->arch.sie_block->pp = 0;
1854 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1855 kvm_clear_async_pf_completion_queue(vcpu);
1856 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1857 kvm_s390_vcpu_stop(vcpu);
1858 kvm_s390_clear_local_irqs(vcpu);
1859}
1860
1861void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1862{
1863 mutex_lock(&vcpu->kvm->lock);
1864 preempt_disable();
1865 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1866 preempt_enable();
1867 mutex_unlock(&vcpu->kvm->lock);
1868 if (!kvm_is_ucontrol(vcpu->kvm)) {
1869 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1870 sca_add_vcpu(vcpu);
1871 }
1872 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1873 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1874 /* make vcpu_load load the right gmap on the first trigger */
1875 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1876}
1877
1878static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1879{
1880 if (!test_kvm_facility(vcpu->kvm, 76))
1881 return;
1882
1883 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1884
1885 if (vcpu->kvm->arch.crypto.aes_kw)
1886 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1887 if (vcpu->kvm->arch.crypto.dea_kw)
1888 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1889
1890 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1891}
1892
1893void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1894{
1895 free_page(vcpu->arch.sie_block->cbrlo);
1896 vcpu->arch.sie_block->cbrlo = 0;
1897}
1898
1899int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1900{
1901 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1902 if (!vcpu->arch.sie_block->cbrlo)
1903 return -ENOMEM;
1904
1905 vcpu->arch.sie_block->ecb2 |= 0x80;
1906 vcpu->arch.sie_block->ecb2 &= ~0x08;
1907 return 0;
1908}
1909
1910static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1911{
1912 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1913
1914 vcpu->arch.sie_block->ibc = model->ibc;
1915 if (test_kvm_facility(vcpu->kvm, 7))
1916 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1917}
1918
1919int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1920{
1921 int rc = 0;
1922
1923 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1924 CPUSTAT_SM |
1925 CPUSTAT_STOPPED);
1926
1927 if (test_kvm_facility(vcpu->kvm, 78))
1928 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1929 else if (test_kvm_facility(vcpu->kvm, 8))
1930 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1931
1932 kvm_s390_vcpu_setup_model(vcpu);
1933
1934 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1935 if (MACHINE_HAS_ESOP)
1936 vcpu->arch.sie_block->ecb |= 0x02;
1937 if (test_kvm_facility(vcpu->kvm, 9))
1938 vcpu->arch.sie_block->ecb |= 0x04;
1939 if (test_kvm_facility(vcpu->kvm, 73))
1940 vcpu->arch.sie_block->ecb |= 0x10;
1941
1942 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1943 vcpu->arch.sie_block->ecb2 |= 0x08;
1944 vcpu->arch.sie_block->eca = 0x1002000U;
1945 if (sclp.has_cei)
1946 vcpu->arch.sie_block->eca |= 0x80000000U;
1947 if (sclp.has_ib)
1948 vcpu->arch.sie_block->eca |= 0x40000000U;
1949 if (sclp.has_siif)
1950 vcpu->arch.sie_block->eca |= 1;
1951 if (sclp.has_sigpif)
1952 vcpu->arch.sie_block->eca |= 0x10000000U;
1953 if (test_kvm_facility(vcpu->kvm, 129)) {
1954 vcpu->arch.sie_block->eca |= 0x00020000;
1955 vcpu->arch.sie_block->ecd |= 0x20000000;
1956 }
1957 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1958 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1959
1960 if (vcpu->kvm->arch.use_cmma) {
1961 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1962 if (rc)
1963 return rc;
1964 }
1965 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1966 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1967
1968 kvm_s390_vcpu_crypto_setup(vcpu);
1969
1970 return rc;
1971}
1972
1973struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1974 unsigned int id)
1975{
1976 struct kvm_vcpu *vcpu;
1977 struct sie_page *sie_page;
1978 int rc = -EINVAL;
1979
1980 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1981 goto out;
1982
1983 rc = -ENOMEM;
1984
1985 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1986 if (!vcpu)
1987 goto out;
1988
1989 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1990 if (!sie_page)
1991 goto out_free_cpu;
1992
1993 vcpu->arch.sie_block = &sie_page->sie_block;
1994 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1995
1996 /* the real guest size will always be smaller than msl */
1997 vcpu->arch.sie_block->mso = 0;
1998 vcpu->arch.sie_block->msl = sclp.hamax;
1999
2000 vcpu->arch.sie_block->icpua = id;
2001 spin_lock_init(&vcpu->arch.local_int.lock);
2002 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2003 vcpu->arch.local_int.wq = &vcpu->wq;
2004 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2005 seqcount_init(&vcpu->arch.cputm_seqcount);
2006
2007 rc = kvm_vcpu_init(vcpu, kvm, id);
2008 if (rc)
2009 goto out_free_sie_block;
2010 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2011 vcpu->arch.sie_block);
2012 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2013
2014 return vcpu;
2015out_free_sie_block:
2016 free_page((unsigned long)(vcpu->arch.sie_block));
2017out_free_cpu:
2018 kmem_cache_free(kvm_vcpu_cache, vcpu);
2019out:
2020 return ERR_PTR(rc);
2021}
2022
2023int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2024{
2025 return kvm_s390_vcpu_has_irq(vcpu, 0);
2026}
2027
2028void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2029{
2030 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2031 exit_sie(vcpu);
2032}
2033
2034void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2035{
2036 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2037}
2038
2039static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2040{
2041 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2042 exit_sie(vcpu);
2043}
2044
2045static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2046{
2047 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2048}
2049
2050/*
2051 * Kick a guest cpu out of SIE and wait until SIE is not running.
2052 * If the CPU is not running (e.g. waiting as idle) the function will
2053 * return immediately. */
2054void exit_sie(struct kvm_vcpu *vcpu)
2055{
2056 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2057 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2058 cpu_relax();
2059}
2060
2061/* Kick a guest cpu out of SIE to process a request synchronously */
2062void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2063{
2064 kvm_make_request(req, vcpu);
2065 kvm_s390_vcpu_request(vcpu);
2066}
2067
2068static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2069 unsigned long end)
2070{
2071 struct kvm *kvm = gmap->private;
2072 struct kvm_vcpu *vcpu;
2073 unsigned long prefix;
2074 int i;
2075
2076 if (gmap_is_shadow(gmap))
2077 return;
2078 if (start >= 1UL << 31)
2079 /* We are only interested in prefix pages */
2080 return;
2081 kvm_for_each_vcpu(i, vcpu, kvm) {
2082 /* match against both prefix pages */
2083 prefix = kvm_s390_get_prefix(vcpu);
2084 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2085 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2086 start, end);
2087 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2088 }
2089 }
2090}
2091
2092int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2093{
2094 /* kvm common code refers to this, but never calls it */
2095 BUG();
2096 return 0;
2097}
2098
2099static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2100 struct kvm_one_reg *reg)
2101{
2102 int r = -EINVAL;
2103
2104 switch (reg->id) {
2105 case KVM_REG_S390_TODPR:
2106 r = put_user(vcpu->arch.sie_block->todpr,
2107 (u32 __user *)reg->addr);
2108 break;
2109 case KVM_REG_S390_EPOCHDIFF:
2110 r = put_user(vcpu->arch.sie_block->epoch,
2111 (u64 __user *)reg->addr);
2112 break;
2113 case KVM_REG_S390_CPU_TIMER:
2114 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2115 (u64 __user *)reg->addr);
2116 break;
2117 case KVM_REG_S390_CLOCK_COMP:
2118 r = put_user(vcpu->arch.sie_block->ckc,
2119 (u64 __user *)reg->addr);
2120 break;
2121 case KVM_REG_S390_PFTOKEN:
2122 r = put_user(vcpu->arch.pfault_token,
2123 (u64 __user *)reg->addr);
2124 break;
2125 case KVM_REG_S390_PFCOMPARE:
2126 r = put_user(vcpu->arch.pfault_compare,
2127 (u64 __user *)reg->addr);
2128 break;
2129 case KVM_REG_S390_PFSELECT:
2130 r = put_user(vcpu->arch.pfault_select,
2131 (u64 __user *)reg->addr);
2132 break;
2133 case KVM_REG_S390_PP:
2134 r = put_user(vcpu->arch.sie_block->pp,
2135 (u64 __user *)reg->addr);
2136 break;
2137 case KVM_REG_S390_GBEA:
2138 r = put_user(vcpu->arch.sie_block->gbea,
2139 (u64 __user *)reg->addr);
2140 break;
2141 default:
2142 break;
2143 }
2144
2145 return r;
2146}
2147
2148static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2149 struct kvm_one_reg *reg)
2150{
2151 int r = -EINVAL;
2152 __u64 val;
2153
2154 switch (reg->id) {
2155 case KVM_REG_S390_TODPR:
2156 r = get_user(vcpu->arch.sie_block->todpr,
2157 (u32 __user *)reg->addr);
2158 break;
2159 case KVM_REG_S390_EPOCHDIFF:
2160 r = get_user(vcpu->arch.sie_block->epoch,
2161 (u64 __user *)reg->addr);
2162 break;
2163 case KVM_REG_S390_CPU_TIMER:
2164 r = get_user(val, (u64 __user *)reg->addr);
2165 if (!r)
2166 kvm_s390_set_cpu_timer(vcpu, val);
2167 break;
2168 case KVM_REG_S390_CLOCK_COMP:
2169 r = get_user(vcpu->arch.sie_block->ckc,
2170 (u64 __user *)reg->addr);
2171 break;
2172 case KVM_REG_S390_PFTOKEN:
2173 r = get_user(vcpu->arch.pfault_token,
2174 (u64 __user *)reg->addr);
2175 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2176 kvm_clear_async_pf_completion_queue(vcpu);
2177 break;
2178 case KVM_REG_S390_PFCOMPARE:
2179 r = get_user(vcpu->arch.pfault_compare,
2180 (u64 __user *)reg->addr);
2181 break;
2182 case KVM_REG_S390_PFSELECT:
2183 r = get_user(vcpu->arch.pfault_select,
2184 (u64 __user *)reg->addr);
2185 break;
2186 case KVM_REG_S390_PP:
2187 r = get_user(vcpu->arch.sie_block->pp,
2188 (u64 __user *)reg->addr);
2189 break;
2190 case KVM_REG_S390_GBEA:
2191 r = get_user(vcpu->arch.sie_block->gbea,
2192 (u64 __user *)reg->addr);
2193 break;
2194 default:
2195 break;
2196 }
2197
2198 return r;
2199}
2200
2201static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2202{
2203 kvm_s390_vcpu_initial_reset(vcpu);
2204 return 0;
2205}
2206
2207int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2208{
2209 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2210 return 0;
2211}
2212
2213int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2214{
2215 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2216 return 0;
2217}
2218
2219int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2220 struct kvm_sregs *sregs)
2221{
2222 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2223 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2224 return 0;
2225}
2226
2227int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2228 struct kvm_sregs *sregs)
2229{
2230 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2231 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2232 return 0;
2233}
2234
2235int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2236{
2237 if (test_fp_ctl(fpu->fpc))
2238 return -EINVAL;
2239 vcpu->run->s.regs.fpc = fpu->fpc;
2240 if (MACHINE_HAS_VX)
2241 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2242 (freg_t *) fpu->fprs);
2243 else
2244 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2245 return 0;
2246}
2247
2248int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2249{
2250 /* make sure we have the latest values */
2251 save_fpu_regs();
2252 if (MACHINE_HAS_VX)
2253 convert_vx_to_fp((freg_t *) fpu->fprs,
2254 (__vector128 *) vcpu->run->s.regs.vrs);
2255 else
2256 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2257 fpu->fpc = vcpu->run->s.regs.fpc;
2258 return 0;
2259}
2260
2261static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2262{
2263 int rc = 0;
2264
2265 if (!is_vcpu_stopped(vcpu))
2266 rc = -EBUSY;
2267 else {
2268 vcpu->run->psw_mask = psw.mask;
2269 vcpu->run->psw_addr = psw.addr;
2270 }
2271 return rc;
2272}
2273
2274int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2275 struct kvm_translation *tr)
2276{
2277 return -EINVAL; /* not implemented yet */
2278}
2279
2280#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2281 KVM_GUESTDBG_USE_HW_BP | \
2282 KVM_GUESTDBG_ENABLE)
2283
2284int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2285 struct kvm_guest_debug *dbg)
2286{
2287 int rc = 0;
2288
2289 vcpu->guest_debug = 0;
2290 kvm_s390_clear_bp_data(vcpu);
2291
2292 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2293 return -EINVAL;
2294 if (!sclp.has_gpere)
2295 return -EINVAL;
2296
2297 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2298 vcpu->guest_debug = dbg->control;
2299 /* enforce guest PER */
2300 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2301
2302 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2303 rc = kvm_s390_import_bp_data(vcpu, dbg);
2304 } else {
2305 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2306 vcpu->arch.guestdbg.last_bp = 0;
2307 }
2308
2309 if (rc) {
2310 vcpu->guest_debug = 0;
2311 kvm_s390_clear_bp_data(vcpu);
2312 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2313 }
2314
2315 return rc;
2316}
2317
2318int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2319 struct kvm_mp_state *mp_state)
2320{
2321 /* CHECK_STOP and LOAD are not supported yet */
2322 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2323 KVM_MP_STATE_OPERATING;
2324}
2325
2326int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2327 struct kvm_mp_state *mp_state)
2328{
2329 int rc = 0;
2330
2331 /* user space knows about this interface - let it control the state */
2332 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2333
2334 switch (mp_state->mp_state) {
2335 case KVM_MP_STATE_STOPPED:
2336 kvm_s390_vcpu_stop(vcpu);
2337 break;
2338 case KVM_MP_STATE_OPERATING:
2339 kvm_s390_vcpu_start(vcpu);
2340 break;
2341 case KVM_MP_STATE_LOAD:
2342 case KVM_MP_STATE_CHECK_STOP:
2343 /* fall through - CHECK_STOP and LOAD are not supported yet */
2344 default:
2345 rc = -ENXIO;
2346 }
2347
2348 return rc;
2349}
2350
2351static bool ibs_enabled(struct kvm_vcpu *vcpu)
2352{
2353 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2354}
2355
2356static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2357{
2358retry:
2359 kvm_s390_vcpu_request_handled(vcpu);
2360 if (!vcpu->requests)
2361 return 0;
2362 /*
2363 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2364 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2365 * This ensures that the ipte instruction for this request has
2366 * already finished. We might race against a second unmapper that
2367 * wants to set the blocking bit. Lets just retry the request loop.
2368 */
2369 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2370 int rc;
2371 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2372 kvm_s390_get_prefix(vcpu),
2373 PAGE_SIZE * 2, PROT_WRITE);
2374 if (rc) {
2375 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2376 return rc;
2377 }
2378 goto retry;
2379 }
2380
2381 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2382 vcpu->arch.sie_block->ihcpu = 0xffff;
2383 goto retry;
2384 }
2385
2386 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2387 if (!ibs_enabled(vcpu)) {
2388 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2389 atomic_or(CPUSTAT_IBS,
2390 &vcpu->arch.sie_block->cpuflags);
2391 }
2392 goto retry;
2393 }
2394
2395 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2396 if (ibs_enabled(vcpu)) {
2397 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2398 atomic_andnot(CPUSTAT_IBS,
2399 &vcpu->arch.sie_block->cpuflags);
2400 }
2401 goto retry;
2402 }
2403
2404 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2405 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2406 goto retry;
2407 }
2408
2409 /* nothing to do, just clear the request */
2410 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2411
2412 return 0;
2413}
2414
2415void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2416{
2417 struct kvm_vcpu *vcpu;
2418 int i;
2419
2420 mutex_lock(&kvm->lock);
2421 preempt_disable();
2422 kvm->arch.epoch = tod - get_tod_clock();
2423 kvm_s390_vcpu_block_all(kvm);
2424 kvm_for_each_vcpu(i, vcpu, kvm)
2425 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2426 kvm_s390_vcpu_unblock_all(kvm);
2427 preempt_enable();
2428 mutex_unlock(&kvm->lock);
2429}
2430
2431/**
2432 * kvm_arch_fault_in_page - fault-in guest page if necessary
2433 * @vcpu: The corresponding virtual cpu
2434 * @gpa: Guest physical address
2435 * @writable: Whether the page should be writable or not
2436 *
2437 * Make sure that a guest page has been faulted-in on the host.
2438 *
2439 * Return: Zero on success, negative error code otherwise.
2440 */
2441long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2442{
2443 return gmap_fault(vcpu->arch.gmap, gpa,
2444 writable ? FAULT_FLAG_WRITE : 0);
2445}
2446
2447static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2448 unsigned long token)
2449{
2450 struct kvm_s390_interrupt inti;
2451 struct kvm_s390_irq irq;
2452
2453 if (start_token) {
2454 irq.u.ext.ext_params2 = token;
2455 irq.type = KVM_S390_INT_PFAULT_INIT;
2456 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2457 } else {
2458 inti.type = KVM_S390_INT_PFAULT_DONE;
2459 inti.parm64 = token;
2460 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2461 }
2462}
2463
2464void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2465 struct kvm_async_pf *work)
2466{
2467 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2468 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2469}
2470
2471void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2472 struct kvm_async_pf *work)
2473{
2474 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2475 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2476}
2477
2478void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2479 struct kvm_async_pf *work)
2480{
2481 /* s390 will always inject the page directly */
2482}
2483
2484bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2485{
2486 /*
2487 * s390 will always inject the page directly,
2488 * but we still want check_async_completion to cleanup
2489 */
2490 return true;
2491}
2492
2493static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2494{
2495 hva_t hva;
2496 struct kvm_arch_async_pf arch;
2497 int rc;
2498
2499 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2500 return 0;
2501 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2502 vcpu->arch.pfault_compare)
2503 return 0;
2504 if (psw_extint_disabled(vcpu))
2505 return 0;
2506 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2507 return 0;
2508 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2509 return 0;
2510 if (!vcpu->arch.gmap->pfault_enabled)
2511 return 0;
2512
2513 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2514 hva += current->thread.gmap_addr & ~PAGE_MASK;
2515 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2516 return 0;
2517
2518 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2519 return rc;
2520}
2521
2522static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2523{
2524 int rc, cpuflags;
2525
2526 /*
2527 * On s390 notifications for arriving pages will be delivered directly
2528 * to the guest but the house keeping for completed pfaults is
2529 * handled outside the worker.
2530 */
2531 kvm_check_async_pf_completion(vcpu);
2532
2533 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2534 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2535
2536 if (need_resched())
2537 schedule();
2538
2539 if (test_cpu_flag(CIF_MCCK_PENDING))
2540 s390_handle_mcck();
2541
2542 if (!kvm_is_ucontrol(vcpu->kvm)) {
2543 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2544 if (rc)
2545 return rc;
2546 }
2547
2548 rc = kvm_s390_handle_requests(vcpu);
2549 if (rc)
2550 return rc;
2551
2552 if (guestdbg_enabled(vcpu)) {
2553 kvm_s390_backup_guest_per_regs(vcpu);
2554 kvm_s390_patch_guest_per_regs(vcpu);
2555 }
2556
2557 vcpu->arch.sie_block->icptcode = 0;
2558 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2559 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2560 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2561
2562 return 0;
2563}
2564
2565static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2566{
2567 struct kvm_s390_pgm_info pgm_info = {
2568 .code = PGM_ADDRESSING,
2569 };
2570 u8 opcode, ilen;
2571 int rc;
2572
2573 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2574 trace_kvm_s390_sie_fault(vcpu);
2575
2576 /*
2577 * We want to inject an addressing exception, which is defined as a
2578 * suppressing or terminating exception. However, since we came here
2579 * by a DAT access exception, the PSW still points to the faulting
2580 * instruction since DAT exceptions are nullifying. So we've got
2581 * to look up the current opcode to get the length of the instruction
2582 * to be able to forward the PSW.
2583 */
2584 rc = read_guest_instr(vcpu, &opcode, 1);
2585 ilen = insn_length(opcode);
2586 if (rc < 0) {
2587 return rc;
2588 } else if (rc) {
2589 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2590 * Forward by arbitrary ilc, injection will take care of
2591 * nullification if necessary.
2592 */
2593 pgm_info = vcpu->arch.pgm;
2594 ilen = 4;
2595 }
2596 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2597 kvm_s390_forward_psw(vcpu, ilen);
2598 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2599}
2600
2601static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2602{
2603 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2604 vcpu->arch.sie_block->icptcode);
2605 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2606
2607 if (guestdbg_enabled(vcpu))
2608 kvm_s390_restore_guest_per_regs(vcpu);
2609
2610 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2611 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2612
2613 if (vcpu->arch.sie_block->icptcode > 0) {
2614 int rc = kvm_handle_sie_intercept(vcpu);
2615
2616 if (rc != -EOPNOTSUPP)
2617 return rc;
2618 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2619 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2620 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2621 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2622 return -EREMOTE;
2623 } else if (exit_reason != -EFAULT) {
2624 vcpu->stat.exit_null++;
2625 return 0;
2626 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2627 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2628 vcpu->run->s390_ucontrol.trans_exc_code =
2629 current->thread.gmap_addr;
2630 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2631 return -EREMOTE;
2632 } else if (current->thread.gmap_pfault) {
2633 trace_kvm_s390_major_guest_pfault(vcpu);
2634 current->thread.gmap_pfault = 0;
2635 if (kvm_arch_setup_async_pf(vcpu))
2636 return 0;
2637 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2638 }
2639 return vcpu_post_run_fault_in_sie(vcpu);
2640}
2641
2642static int __vcpu_run(struct kvm_vcpu *vcpu)
2643{
2644 int rc, exit_reason;
2645
2646 /*
2647 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2648 * ning the guest), so that memslots (and other stuff) are protected
2649 */
2650 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2651
2652 do {
2653 rc = vcpu_pre_run(vcpu);
2654 if (rc)
2655 break;
2656
2657 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2658 /*
2659 * As PF_VCPU will be used in fault handler, between
2660 * guest_enter and guest_exit should be no uaccess.
2661 */
2662 local_irq_disable();
2663 guest_enter_irqoff();
2664 __disable_cpu_timer_accounting(vcpu);
2665 local_irq_enable();
2666 exit_reason = sie64a(vcpu->arch.sie_block,
2667 vcpu->run->s.regs.gprs);
2668 local_irq_disable();
2669 __enable_cpu_timer_accounting(vcpu);
2670 guest_exit_irqoff();
2671 local_irq_enable();
2672 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2673
2674 rc = vcpu_post_run(vcpu, exit_reason);
2675 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2676
2677 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2678 return rc;
2679}
2680
2681static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2682{
2683 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2684 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2685 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2686 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2687 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2688 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2689 /* some control register changes require a tlb flush */
2690 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2691 }
2692 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2693 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2694 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2695 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2696 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2697 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2698 }
2699 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2700 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2701 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2702 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2703 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2704 kvm_clear_async_pf_completion_queue(vcpu);
2705 }
2706 /*
2707 * If userspace sets the riccb (e.g. after migration) to a valid state,
2708 * we should enable RI here instead of doing the lazy enablement.
2709 */
2710 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2711 test_kvm_facility(vcpu->kvm, 64)) {
2712 struct runtime_instr_cb *riccb =
2713 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2714
2715 if (riccb->valid)
2716 vcpu->arch.sie_block->ecb3 |= 0x01;
2717 }
2718 save_access_regs(vcpu->arch.host_acrs);
2719 restore_access_regs(vcpu->run->s.regs.acrs);
2720 /* save host (userspace) fprs/vrs */
2721 save_fpu_regs();
2722 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2723 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2724 if (MACHINE_HAS_VX)
2725 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2726 else
2727 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2728 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2729 if (test_fp_ctl(current->thread.fpu.fpc))
2730 /* User space provided an invalid FPC, let's clear it */
2731 current->thread.fpu.fpc = 0;
2732
2733 kvm_run->kvm_dirty_regs = 0;
2734}
2735
2736static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2737{
2738 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2739 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2740 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2741 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2742 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2743 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2744 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2745 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2746 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2747 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2748 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2749 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2750 save_access_regs(vcpu->run->s.regs.acrs);
2751 restore_access_regs(vcpu->arch.host_acrs);
2752 /* Save guest register state */
2753 save_fpu_regs();
2754 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2755 /* Restore will be done lazily at return */
2756 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2757 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2758
2759}
2760
2761int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2762{
2763 int rc;
2764 sigset_t sigsaved;
2765
2766 if (guestdbg_exit_pending(vcpu)) {
2767 kvm_s390_prepare_debug_exit(vcpu);
2768 return 0;
2769 }
2770
2771 if (vcpu->sigset_active)
2772 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2773
2774 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2775 kvm_s390_vcpu_start(vcpu);
2776 } else if (is_vcpu_stopped(vcpu)) {
2777 pr_err_ratelimited("can't run stopped vcpu %d\n",
2778 vcpu->vcpu_id);
2779 return -EINVAL;
2780 }
2781
2782 sync_regs(vcpu, kvm_run);
2783 enable_cpu_timer_accounting(vcpu);
2784
2785 might_fault();
2786 rc = __vcpu_run(vcpu);
2787
2788 if (signal_pending(current) && !rc) {
2789 kvm_run->exit_reason = KVM_EXIT_INTR;
2790 rc = -EINTR;
2791 }
2792
2793 if (guestdbg_exit_pending(vcpu) && !rc) {
2794 kvm_s390_prepare_debug_exit(vcpu);
2795 rc = 0;
2796 }
2797
2798 if (rc == -EREMOTE) {
2799 /* userspace support is needed, kvm_run has been prepared */
2800 rc = 0;
2801 }
2802
2803 disable_cpu_timer_accounting(vcpu);
2804 store_regs(vcpu, kvm_run);
2805
2806 if (vcpu->sigset_active)
2807 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2808
2809 vcpu->stat.exit_userspace++;
2810 return rc;
2811}
2812
2813/*
2814 * store status at address
2815 * we use have two special cases:
2816 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2817 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2818 */
2819int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2820{
2821 unsigned char archmode = 1;
2822 freg_t fprs[NUM_FPRS];
2823 unsigned int px;
2824 u64 clkcomp, cputm;
2825 int rc;
2826
2827 px = kvm_s390_get_prefix(vcpu);
2828 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2829 if (write_guest_abs(vcpu, 163, &archmode, 1))
2830 return -EFAULT;
2831 gpa = 0;
2832 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2833 if (write_guest_real(vcpu, 163, &archmode, 1))
2834 return -EFAULT;
2835 gpa = px;
2836 } else
2837 gpa -= __LC_FPREGS_SAVE_AREA;
2838
2839 /* manually convert vector registers if necessary */
2840 if (MACHINE_HAS_VX) {
2841 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2842 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2843 fprs, 128);
2844 } else {
2845 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2846 vcpu->run->s.regs.fprs, 128);
2847 }
2848 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2849 vcpu->run->s.regs.gprs, 128);
2850 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2851 &vcpu->arch.sie_block->gpsw, 16);
2852 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2853 &px, 4);
2854 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2855 &vcpu->run->s.regs.fpc, 4);
2856 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2857 &vcpu->arch.sie_block->todpr, 4);
2858 cputm = kvm_s390_get_cpu_timer(vcpu);
2859 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2860 &cputm, 8);
2861 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2862 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2863 &clkcomp, 8);
2864 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2865 &vcpu->run->s.regs.acrs, 64);
2866 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2867 &vcpu->arch.sie_block->gcr, 128);
2868 return rc ? -EFAULT : 0;
2869}
2870
2871int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2872{
2873 /*
2874 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2875 * switch in the run ioctl. Let's update our copies before we save
2876 * it into the save area
2877 */
2878 save_fpu_regs();
2879 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2880 save_access_regs(vcpu->run->s.regs.acrs);
2881
2882 return kvm_s390_store_status_unloaded(vcpu, addr);
2883}
2884
2885static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2886{
2887 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2888 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2889}
2890
2891static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2892{
2893 unsigned int i;
2894 struct kvm_vcpu *vcpu;
2895
2896 kvm_for_each_vcpu(i, vcpu, kvm) {
2897 __disable_ibs_on_vcpu(vcpu);
2898 }
2899}
2900
2901static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2902{
2903 if (!sclp.has_ibs)
2904 return;
2905 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2906 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2907}
2908
2909void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2910{
2911 int i, online_vcpus, started_vcpus = 0;
2912
2913 if (!is_vcpu_stopped(vcpu))
2914 return;
2915
2916 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2917 /* Only one cpu at a time may enter/leave the STOPPED state. */
2918 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2919 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2920
2921 for (i = 0; i < online_vcpus; i++) {
2922 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2923 started_vcpus++;
2924 }
2925
2926 if (started_vcpus == 0) {
2927 /* we're the only active VCPU -> speed it up */
2928 __enable_ibs_on_vcpu(vcpu);
2929 } else if (started_vcpus == 1) {
2930 /*
2931 * As we are starting a second VCPU, we have to disable
2932 * the IBS facility on all VCPUs to remove potentially
2933 * oustanding ENABLE requests.
2934 */
2935 __disable_ibs_on_all_vcpus(vcpu->kvm);
2936 }
2937
2938 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2939 /*
2940 * Another VCPU might have used IBS while we were offline.
2941 * Let's play safe and flush the VCPU at startup.
2942 */
2943 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2944 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2945 return;
2946}
2947
2948void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2949{
2950 int i, online_vcpus, started_vcpus = 0;
2951 struct kvm_vcpu *started_vcpu = NULL;
2952
2953 if (is_vcpu_stopped(vcpu))
2954 return;
2955
2956 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2957 /* Only one cpu at a time may enter/leave the STOPPED state. */
2958 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2959 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2960
2961 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2962 kvm_s390_clear_stop_irq(vcpu);
2963
2964 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2965 __disable_ibs_on_vcpu(vcpu);
2966
2967 for (i = 0; i < online_vcpus; i++) {
2968 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2969 started_vcpus++;
2970 started_vcpu = vcpu->kvm->vcpus[i];
2971 }
2972 }
2973
2974 if (started_vcpus == 1) {
2975 /*
2976 * As we only have one VCPU left, we want to enable the
2977 * IBS facility for that VCPU to speed it up.
2978 */
2979 __enable_ibs_on_vcpu(started_vcpu);
2980 }
2981
2982 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2983 return;
2984}
2985
2986static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2987 struct kvm_enable_cap *cap)
2988{
2989 int r;
2990
2991 if (cap->flags)
2992 return -EINVAL;
2993
2994 switch (cap->cap) {
2995 case KVM_CAP_S390_CSS_SUPPORT:
2996 if (!vcpu->kvm->arch.css_support) {
2997 vcpu->kvm->arch.css_support = 1;
2998 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2999 trace_kvm_s390_enable_css(vcpu->kvm);
3000 }
3001 r = 0;
3002 break;
3003 default:
3004 r = -EINVAL;
3005 break;
3006 }
3007 return r;
3008}
3009
3010static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3011 struct kvm_s390_mem_op *mop)
3012{
3013 void __user *uaddr = (void __user *)mop->buf;
3014 void *tmpbuf = NULL;
3015 int r, srcu_idx;
3016 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3017 | KVM_S390_MEMOP_F_CHECK_ONLY;
3018
3019 if (mop->flags & ~supported_flags)
3020 return -EINVAL;
3021
3022 if (mop->size > MEM_OP_MAX_SIZE)
3023 return -E2BIG;
3024
3025 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3026 tmpbuf = vmalloc(mop->size);
3027 if (!tmpbuf)
3028 return -ENOMEM;
3029 }
3030
3031 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3032
3033 switch (mop->op) {
3034 case KVM_S390_MEMOP_LOGICAL_READ:
3035 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3036 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3037 mop->size, GACC_FETCH);
3038 break;
3039 }
3040 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3041 if (r == 0) {
3042 if (copy_to_user(uaddr, tmpbuf, mop->size))
3043 r = -EFAULT;
3044 }
3045 break;
3046 case KVM_S390_MEMOP_LOGICAL_WRITE:
3047 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3048 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3049 mop->size, GACC_STORE);
3050 break;
3051 }
3052 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3053 r = -EFAULT;
3054 break;
3055 }
3056 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3057 break;
3058 default:
3059 r = -EINVAL;
3060 }
3061
3062 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3063
3064 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3065 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3066
3067 vfree(tmpbuf);
3068 return r;
3069}
3070
3071long kvm_arch_vcpu_ioctl(struct file *filp,
3072 unsigned int ioctl, unsigned long arg)
3073{
3074 struct kvm_vcpu *vcpu = filp->private_data;
3075 void __user *argp = (void __user *)arg;
3076 int idx;
3077 long r;
3078
3079 switch (ioctl) {
3080 case KVM_S390_IRQ: {
3081 struct kvm_s390_irq s390irq;
3082
3083 r = -EFAULT;
3084 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3085 break;
3086 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3087 break;
3088 }
3089 case KVM_S390_INTERRUPT: {
3090 struct kvm_s390_interrupt s390int;
3091 struct kvm_s390_irq s390irq;
3092
3093 r = -EFAULT;
3094 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3095 break;
3096 if (s390int_to_s390irq(&s390int, &s390irq))
3097 return -EINVAL;
3098 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3099 break;
3100 }
3101 case KVM_S390_STORE_STATUS:
3102 idx = srcu_read_lock(&vcpu->kvm->srcu);
3103 r = kvm_s390_vcpu_store_status(vcpu, arg);
3104 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3105 break;
3106 case KVM_S390_SET_INITIAL_PSW: {
3107 psw_t psw;
3108
3109 r = -EFAULT;
3110 if (copy_from_user(&psw, argp, sizeof(psw)))
3111 break;
3112 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3113 break;
3114 }
3115 case KVM_S390_INITIAL_RESET:
3116 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3117 break;
3118 case KVM_SET_ONE_REG:
3119 case KVM_GET_ONE_REG: {
3120 struct kvm_one_reg reg;
3121 r = -EFAULT;
3122 if (copy_from_user(®, argp, sizeof(reg)))
3123 break;
3124 if (ioctl == KVM_SET_ONE_REG)
3125 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3126 else
3127 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3128 break;
3129 }
3130#ifdef CONFIG_KVM_S390_UCONTROL
3131 case KVM_S390_UCAS_MAP: {
3132 struct kvm_s390_ucas_mapping ucasmap;
3133
3134 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3135 r = -EFAULT;
3136 break;
3137 }
3138
3139 if (!kvm_is_ucontrol(vcpu->kvm)) {
3140 r = -EINVAL;
3141 break;
3142 }
3143
3144 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3145 ucasmap.vcpu_addr, ucasmap.length);
3146 break;
3147 }
3148 case KVM_S390_UCAS_UNMAP: {
3149 struct kvm_s390_ucas_mapping ucasmap;
3150
3151 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3152 r = -EFAULT;
3153 break;
3154 }
3155
3156 if (!kvm_is_ucontrol(vcpu->kvm)) {
3157 r = -EINVAL;
3158 break;
3159 }
3160
3161 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3162 ucasmap.length);
3163 break;
3164 }
3165#endif
3166 case KVM_S390_VCPU_FAULT: {
3167 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3168 break;
3169 }
3170 case KVM_ENABLE_CAP:
3171 {
3172 struct kvm_enable_cap cap;
3173 r = -EFAULT;
3174 if (copy_from_user(&cap, argp, sizeof(cap)))
3175 break;
3176 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3177 break;
3178 }
3179 case KVM_S390_MEM_OP: {
3180 struct kvm_s390_mem_op mem_op;
3181
3182 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3183 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3184 else
3185 r = -EFAULT;
3186 break;
3187 }
3188 case KVM_S390_SET_IRQ_STATE: {
3189 struct kvm_s390_irq_state irq_state;
3190
3191 r = -EFAULT;
3192 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3193 break;
3194 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3195 irq_state.len == 0 ||
3196 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3197 r = -EINVAL;
3198 break;
3199 }
3200 r = kvm_s390_set_irq_state(vcpu,
3201 (void __user *) irq_state.buf,
3202 irq_state.len);
3203 break;
3204 }
3205 case KVM_S390_GET_IRQ_STATE: {
3206 struct kvm_s390_irq_state irq_state;
3207
3208 r = -EFAULT;
3209 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3210 break;
3211 if (irq_state.len == 0) {
3212 r = -EINVAL;
3213 break;
3214 }
3215 r = kvm_s390_get_irq_state(vcpu,
3216 (__u8 __user *) irq_state.buf,
3217 irq_state.len);
3218 break;
3219 }
3220 default:
3221 r = -ENOTTY;
3222 }
3223 return r;
3224}
3225
3226int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3227{
3228#ifdef CONFIG_KVM_S390_UCONTROL
3229 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3230 && (kvm_is_ucontrol(vcpu->kvm))) {
3231 vmf->page = virt_to_page(vcpu->arch.sie_block);
3232 get_page(vmf->page);
3233 return 0;
3234 }
3235#endif
3236 return VM_FAULT_SIGBUS;
3237}
3238
3239int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3240 unsigned long npages)
3241{
3242 return 0;
3243}
3244
3245/* Section: memory related */
3246int kvm_arch_prepare_memory_region(struct kvm *kvm,
3247 struct kvm_memory_slot *memslot,
3248 const struct kvm_userspace_memory_region *mem,
3249 enum kvm_mr_change change)
3250{
3251 /* A few sanity checks. We can have memory slots which have to be
3252 located/ended at a segment boundary (1MB). The memory in userland is
3253 ok to be fragmented into various different vmas. It is okay to mmap()
3254 and munmap() stuff in this slot after doing this call at any time */
3255
3256 if (mem->userspace_addr & 0xffffful)
3257 return -EINVAL;
3258
3259 if (mem->memory_size & 0xffffful)
3260 return -EINVAL;
3261
3262 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3263 return -EINVAL;
3264
3265 return 0;
3266}
3267
3268void kvm_arch_commit_memory_region(struct kvm *kvm,
3269 const struct kvm_userspace_memory_region *mem,
3270 const struct kvm_memory_slot *old,
3271 const struct kvm_memory_slot *new,
3272 enum kvm_mr_change change)
3273{
3274 int rc;
3275
3276 /* If the basics of the memslot do not change, we do not want
3277 * to update the gmap. Every update causes several unnecessary
3278 * segment translation exceptions. This is usually handled just
3279 * fine by the normal fault handler + gmap, but it will also
3280 * cause faults on the prefix page of running guest CPUs.
3281 */
3282 if (old->userspace_addr == mem->userspace_addr &&
3283 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3284 old->npages * PAGE_SIZE == mem->memory_size)
3285 return;
3286
3287 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3288 mem->guest_phys_addr, mem->memory_size);
3289 if (rc)
3290 pr_warn("failed to commit memory region\n");
3291 return;
3292}
3293
3294static inline unsigned long nonhyp_mask(int i)
3295{
3296 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3297
3298 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3299}
3300
3301void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3302{
3303 vcpu->valid_wakeup = false;
3304}
3305
3306static int __init kvm_s390_init(void)
3307{
3308 int i;
3309
3310 if (!sclp.has_sief2) {
3311 pr_info("SIE not available\n");
3312 return -ENODEV;
3313 }
3314
3315 for (i = 0; i < 16; i++)
3316 kvm_s390_fac_list_mask[i] |=
3317 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3318
3319 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3320}
3321
3322static void __exit kvm_s390_exit(void)
3323{
3324 kvm_exit();
3325}
3326
3327module_init(kvm_s390_init);
3328module_exit(kvm_s390_exit);
3329
3330/*
3331 * Enable autoloading of the kvm module.
3332 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3333 * since x86 takes a different approach.
3334 */
3335#include <linux/miscdevice.h>
3336MODULE_ALIAS_MISCDEV(KVM_MINOR);
3337MODULE_ALIAS("devname:kvm");
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14#define KMSG_COMPONENT "kvm-s390"
15#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17#include <linux/compiler.h>
18#include <linux/err.h>
19#include <linux/fs.h>
20#include <linux/hrtimer.h>
21#include <linux/init.h>
22#include <linux/kvm.h>
23#include <linux/kvm_host.h>
24#include <linux/mman.h>
25#include <linux/module.h>
26#include <linux/moduleparam.h>
27#include <linux/random.h>
28#include <linux/slab.h>
29#include <linux/timer.h>
30#include <linux/vmalloc.h>
31#include <linux/bitmap.h>
32#include <linux/sched/signal.h>
33#include <linux/string.h>
34#include <linux/pgtable.h>
35
36#include <asm/asm-offsets.h>
37#include <asm/lowcore.h>
38#include <asm/stp.h>
39#include <asm/gmap.h>
40#include <asm/nmi.h>
41#include <asm/switch_to.h>
42#include <asm/isc.h>
43#include <asm/sclp.h>
44#include <asm/cpacf.h>
45#include <asm/timex.h>
46#include <asm/ap.h>
47#include <asm/uv.h>
48#include <asm/fpu/api.h>
49#include "kvm-s390.h"
50#include "gaccess.h"
51
52#define CREATE_TRACE_POINTS
53#include "trace.h"
54#include "trace-s390.h"
55
56#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
57#define LOCAL_IRQS 32
58#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
60
61const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 KVM_GENERIC_VM_STATS(),
63 STATS_DESC_COUNTER(VM, inject_io),
64 STATS_DESC_COUNTER(VM, inject_float_mchk),
65 STATS_DESC_COUNTER(VM, inject_pfault_done),
66 STATS_DESC_COUNTER(VM, inject_service_signal),
67 STATS_DESC_COUNTER(VM, inject_virtio)
68};
69static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
70 sizeof(struct kvm_vm_stat) / sizeof(u64));
71
72const struct kvm_stats_header kvm_vm_stats_header = {
73 .name_size = KVM_STATS_NAME_SIZE,
74 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 .id_offset = sizeof(struct kvm_stats_header),
76 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 sizeof(kvm_vm_stats_desc),
79};
80
81const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 KVM_GENERIC_VCPU_STATS(),
83 STATS_DESC_COUNTER(VCPU, exit_userspace),
84 STATS_DESC_COUNTER(VCPU, exit_null),
85 STATS_DESC_COUNTER(VCPU, exit_external_request),
86 STATS_DESC_COUNTER(VCPU, exit_io_request),
87 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 STATS_DESC_COUNTER(VCPU, exit_validity),
90 STATS_DESC_COUNTER(VCPU, exit_instruction),
91 STATS_DESC_COUNTER(VCPU, exit_pei),
92 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 STATS_DESC_COUNTER(VCPU, deliver_program),
110 STATS_DESC_COUNTER(VCPU, deliver_io),
111 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 STATS_DESC_COUNTER(VCPU, inject_ckc),
114 STATS_DESC_COUNTER(VCPU, inject_cputm),
115 STATS_DESC_COUNTER(VCPU, inject_external_call),
116 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 STATS_DESC_COUNTER(VCPU, inject_mchk),
118 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 STATS_DESC_COUNTER(VCPU, inject_program),
120 STATS_DESC_COUNTER(VCPU, inject_restart),
121 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 STATS_DESC_COUNTER(VCPU, instruction_gs),
125 STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 STATS_DESC_COUNTER(VCPU, instruction_sck),
131 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 STATS_DESC_COUNTER(VCPU, instruction_spx),
134 STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 STATS_DESC_COUNTER(VCPU, instruction_stap),
136 STATS_DESC_COUNTER(VCPU, instruction_iske),
137 STATS_DESC_COUNTER(VCPU, instruction_ri),
138 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 STATS_DESC_COUNTER(VCPU, instruction_sske),
140 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 STATS_DESC_COUNTER(VCPU, instruction_tb),
144 STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 STATS_DESC_COUNTER(VCPU, instruction_sie),
148 STATS_DESC_COUNTER(VCPU, instruction_essa),
149 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175 STATS_DESC_COUNTER(VCPU, pfault_sync)
176};
177static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
178 sizeof(struct kvm_vcpu_stat) / sizeof(u64));
179
180const struct kvm_stats_header kvm_vcpu_stats_header = {
181 .name_size = KVM_STATS_NAME_SIZE,
182 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
183 .id_offset = sizeof(struct kvm_stats_header),
184 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
185 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
186 sizeof(kvm_vcpu_stats_desc),
187};
188
189/* allow nested virtualization in KVM (if enabled by user space) */
190static int nested;
191module_param(nested, int, S_IRUGO);
192MODULE_PARM_DESC(nested, "Nested virtualization support");
193
194/* allow 1m huge page guest backing, if !nested */
195static int hpage;
196module_param(hpage, int, 0444);
197MODULE_PARM_DESC(hpage, "1m huge page backing support");
198
199/* maximum percentage of steal time for polling. >100 is treated like 100 */
200static u8 halt_poll_max_steal = 10;
201module_param(halt_poll_max_steal, byte, 0644);
202MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
203
204/* if set to true, the GISA will be initialized and used if available */
205static bool use_gisa = true;
206module_param(use_gisa, bool, 0644);
207MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
208
209/* maximum diag9c forwarding per second */
210unsigned int diag9c_forwarding_hz;
211module_param(diag9c_forwarding_hz, uint, 0644);
212MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
213
214/*
215 * For now we handle at most 16 double words as this is what the s390 base
216 * kernel handles and stores in the prefix page. If we ever need to go beyond
217 * this, this requires changes to code, but the external uapi can stay.
218 */
219#define SIZE_INTERNAL 16
220
221/*
222 * Base feature mask that defines default mask for facilities. Consists of the
223 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
224 */
225static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
226/*
227 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
228 * and defines the facilities that can be enabled via a cpu model.
229 */
230static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
231
232static unsigned long kvm_s390_fac_size(void)
233{
234 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
235 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
236 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
237 sizeof(stfle_fac_list));
238
239 return SIZE_INTERNAL;
240}
241
242/* available cpu features supported by kvm */
243static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
244/* available subfunctions indicated via query / "test bit" */
245static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
246
247static struct gmap_notifier gmap_notifier;
248static struct gmap_notifier vsie_gmap_notifier;
249debug_info_t *kvm_s390_dbf;
250debug_info_t *kvm_s390_dbf_uv;
251
252/* Section: not file related */
253int kvm_arch_hardware_enable(void)
254{
255 /* every s390 is virtualization enabled ;-) */
256 return 0;
257}
258
259int kvm_arch_check_processor_compat(void *opaque)
260{
261 return 0;
262}
263
264/* forward declarations */
265static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
266 unsigned long end);
267static int sca_switch_to_extended(struct kvm *kvm);
268
269static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
270{
271 u8 delta_idx = 0;
272
273 /*
274 * The TOD jumps by delta, we have to compensate this by adding
275 * -delta to the epoch.
276 */
277 delta = -delta;
278
279 /* sign-extension - we're adding to signed values below */
280 if ((s64)delta < 0)
281 delta_idx = -1;
282
283 scb->epoch += delta;
284 if (scb->ecd & ECD_MEF) {
285 scb->epdx += delta_idx;
286 if (scb->epoch < delta)
287 scb->epdx += 1;
288 }
289}
290
291/*
292 * This callback is executed during stop_machine(). All CPUs are therefore
293 * temporarily stopped. In order not to change guest behavior, we have to
294 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
295 * so a CPU won't be stopped while calculating with the epoch.
296 */
297static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
298 void *v)
299{
300 struct kvm *kvm;
301 struct kvm_vcpu *vcpu;
302 int i;
303 unsigned long long *delta = v;
304
305 list_for_each_entry(kvm, &vm_list, vm_list) {
306 kvm_for_each_vcpu(i, vcpu, kvm) {
307 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
308 if (i == 0) {
309 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
310 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
311 }
312 if (vcpu->arch.cputm_enabled)
313 vcpu->arch.cputm_start += *delta;
314 if (vcpu->arch.vsie_block)
315 kvm_clock_sync_scb(vcpu->arch.vsie_block,
316 *delta);
317 }
318 }
319 return NOTIFY_OK;
320}
321
322static struct notifier_block kvm_clock_notifier = {
323 .notifier_call = kvm_clock_sync,
324};
325
326int kvm_arch_hardware_setup(void *opaque)
327{
328 gmap_notifier.notifier_call = kvm_gmap_notifier;
329 gmap_register_pte_notifier(&gmap_notifier);
330 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
331 gmap_register_pte_notifier(&vsie_gmap_notifier);
332 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
333 &kvm_clock_notifier);
334 return 0;
335}
336
337void kvm_arch_hardware_unsetup(void)
338{
339 gmap_unregister_pte_notifier(&gmap_notifier);
340 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
341 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
342 &kvm_clock_notifier);
343}
344
345static void allow_cpu_feat(unsigned long nr)
346{
347 set_bit_inv(nr, kvm_s390_available_cpu_feat);
348}
349
350static inline int plo_test_bit(unsigned char nr)
351{
352 unsigned long function = (unsigned long)nr | 0x100;
353 int cc;
354
355 asm volatile(
356 " lgr 0,%[function]\n"
357 /* Parameter registers are ignored for "test bit" */
358 " plo 0,0,0,0(0)\n"
359 " ipm %0\n"
360 " srl %0,28\n"
361 : "=d" (cc)
362 : [function] "d" (function)
363 : "cc", "0");
364 return cc == 0;
365}
366
367static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
368{
369 asm volatile(
370 " lghi 0,0\n"
371 " lgr 1,%[query]\n"
372 /* Parameter registers are ignored */
373 " .insn rrf,%[opc] << 16,2,4,6,0\n"
374 :
375 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
376 : "cc", "memory", "0", "1");
377}
378
379#define INSN_SORTL 0xb938
380#define INSN_DFLTCC 0xb939
381
382static void kvm_s390_cpu_feat_init(void)
383{
384 int i;
385
386 for (i = 0; i < 256; ++i) {
387 if (plo_test_bit(i))
388 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
389 }
390
391 if (test_facility(28)) /* TOD-clock steering */
392 ptff(kvm_s390_available_subfunc.ptff,
393 sizeof(kvm_s390_available_subfunc.ptff),
394 PTFF_QAF);
395
396 if (test_facility(17)) { /* MSA */
397 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.kmac);
399 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
400 kvm_s390_available_subfunc.kmc);
401 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.km);
403 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
404 kvm_s390_available_subfunc.kimd);
405 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.klmd);
407 }
408 if (test_facility(76)) /* MSA3 */
409 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
410 kvm_s390_available_subfunc.pckmo);
411 if (test_facility(77)) { /* MSA4 */
412 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.kmctr);
414 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
415 kvm_s390_available_subfunc.kmf);
416 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
417 kvm_s390_available_subfunc.kmo);
418 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.pcc);
420 }
421 if (test_facility(57)) /* MSA5 */
422 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.ppno);
424
425 if (test_facility(146)) /* MSA8 */
426 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
427 kvm_s390_available_subfunc.kma);
428
429 if (test_facility(155)) /* MSA9 */
430 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
431 kvm_s390_available_subfunc.kdsa);
432
433 if (test_facility(150)) /* SORTL */
434 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
435
436 if (test_facility(151)) /* DFLTCC */
437 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
438
439 if (MACHINE_HAS_ESOP)
440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
441 /*
442 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
443 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
444 */
445 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
446 !test_facility(3) || !nested)
447 return;
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
449 if (sclp.has_64bscao)
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
451 if (sclp.has_siif)
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
453 if (sclp.has_gpere)
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
455 if (sclp.has_gsls)
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
457 if (sclp.has_ib)
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
459 if (sclp.has_cei)
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
461 if (sclp.has_ibs)
462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
463 if (sclp.has_kss)
464 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
465 /*
466 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
467 * all skey handling functions read/set the skey from the PGSTE
468 * instead of the real storage key.
469 *
470 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
471 * pages being detected as preserved although they are resident.
472 *
473 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
474 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
475 *
476 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
477 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
478 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
479 *
480 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
481 * cannot easily shadow the SCA because of the ipte lock.
482 */
483}
484
485int kvm_arch_init(void *opaque)
486{
487 int rc = -ENOMEM;
488
489 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
490 if (!kvm_s390_dbf)
491 return -ENOMEM;
492
493 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
494 if (!kvm_s390_dbf_uv)
495 goto out;
496
497 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
498 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
499 goto out;
500
501 kvm_s390_cpu_feat_init();
502
503 /* Register floating interrupt controller interface. */
504 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
505 if (rc) {
506 pr_err("A FLIC registration call failed with rc=%d\n", rc);
507 goto out;
508 }
509
510 rc = kvm_s390_gib_init(GAL_ISC);
511 if (rc)
512 goto out;
513
514 return 0;
515
516out:
517 kvm_arch_exit();
518 return rc;
519}
520
521void kvm_arch_exit(void)
522{
523 kvm_s390_gib_destroy();
524 debug_unregister(kvm_s390_dbf);
525 debug_unregister(kvm_s390_dbf_uv);
526}
527
528/* Section: device related */
529long kvm_arch_dev_ioctl(struct file *filp,
530 unsigned int ioctl, unsigned long arg)
531{
532 if (ioctl == KVM_S390_ENABLE_SIE)
533 return s390_enable_sie();
534 return -EINVAL;
535}
536
537int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
538{
539 int r;
540
541 switch (ext) {
542 case KVM_CAP_S390_PSW:
543 case KVM_CAP_S390_GMAP:
544 case KVM_CAP_SYNC_MMU:
545#ifdef CONFIG_KVM_S390_UCONTROL
546 case KVM_CAP_S390_UCONTROL:
547#endif
548 case KVM_CAP_ASYNC_PF:
549 case KVM_CAP_SYNC_REGS:
550 case KVM_CAP_ONE_REG:
551 case KVM_CAP_ENABLE_CAP:
552 case KVM_CAP_S390_CSS_SUPPORT:
553 case KVM_CAP_IOEVENTFD:
554 case KVM_CAP_DEVICE_CTRL:
555 case KVM_CAP_S390_IRQCHIP:
556 case KVM_CAP_VM_ATTRIBUTES:
557 case KVM_CAP_MP_STATE:
558 case KVM_CAP_IMMEDIATE_EXIT:
559 case KVM_CAP_S390_INJECT_IRQ:
560 case KVM_CAP_S390_USER_SIGP:
561 case KVM_CAP_S390_USER_STSI:
562 case KVM_CAP_S390_SKEYS:
563 case KVM_CAP_S390_IRQ_STATE:
564 case KVM_CAP_S390_USER_INSTR0:
565 case KVM_CAP_S390_CMMA_MIGRATION:
566 case KVM_CAP_S390_AIS:
567 case KVM_CAP_S390_AIS_MIGRATION:
568 case KVM_CAP_S390_VCPU_RESETS:
569 case KVM_CAP_SET_GUEST_DEBUG:
570 case KVM_CAP_S390_DIAG318:
571 r = 1;
572 break;
573 case KVM_CAP_SET_GUEST_DEBUG2:
574 r = KVM_GUESTDBG_VALID_MASK;
575 break;
576 case KVM_CAP_S390_HPAGE_1M:
577 r = 0;
578 if (hpage && !kvm_is_ucontrol(kvm))
579 r = 1;
580 break;
581 case KVM_CAP_S390_MEM_OP:
582 r = MEM_OP_MAX_SIZE;
583 break;
584 case KVM_CAP_NR_VCPUS:
585 case KVM_CAP_MAX_VCPUS:
586 case KVM_CAP_MAX_VCPU_ID:
587 r = KVM_S390_BSCA_CPU_SLOTS;
588 if (!kvm_s390_use_sca_entries())
589 r = KVM_MAX_VCPUS;
590 else if (sclp.has_esca && sclp.has_64bscao)
591 r = KVM_S390_ESCA_CPU_SLOTS;
592 break;
593 case KVM_CAP_S390_COW:
594 r = MACHINE_HAS_ESOP;
595 break;
596 case KVM_CAP_S390_VECTOR_REGISTERS:
597 r = MACHINE_HAS_VX;
598 break;
599 case KVM_CAP_S390_RI:
600 r = test_facility(64);
601 break;
602 case KVM_CAP_S390_GS:
603 r = test_facility(133);
604 break;
605 case KVM_CAP_S390_BPB:
606 r = test_facility(82);
607 break;
608 case KVM_CAP_S390_PROTECTED:
609 r = is_prot_virt_host();
610 break;
611 default:
612 r = 0;
613 }
614 return r;
615}
616
617void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
618{
619 int i;
620 gfn_t cur_gfn, last_gfn;
621 unsigned long gaddr, vmaddr;
622 struct gmap *gmap = kvm->arch.gmap;
623 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
624
625 /* Loop over all guest segments */
626 cur_gfn = memslot->base_gfn;
627 last_gfn = memslot->base_gfn + memslot->npages;
628 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
629 gaddr = gfn_to_gpa(cur_gfn);
630 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
631 if (kvm_is_error_hva(vmaddr))
632 continue;
633
634 bitmap_zero(bitmap, _PAGE_ENTRIES);
635 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
636 for (i = 0; i < _PAGE_ENTRIES; i++) {
637 if (test_bit(i, bitmap))
638 mark_page_dirty(kvm, cur_gfn + i);
639 }
640
641 if (fatal_signal_pending(current))
642 return;
643 cond_resched();
644 }
645}
646
647/* Section: vm related */
648static void sca_del_vcpu(struct kvm_vcpu *vcpu);
649
650/*
651 * Get (and clear) the dirty memory log for a memory slot.
652 */
653int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
654 struct kvm_dirty_log *log)
655{
656 int r;
657 unsigned long n;
658 struct kvm_memory_slot *memslot;
659 int is_dirty;
660
661 if (kvm_is_ucontrol(kvm))
662 return -EINVAL;
663
664 mutex_lock(&kvm->slots_lock);
665
666 r = -EINVAL;
667 if (log->slot >= KVM_USER_MEM_SLOTS)
668 goto out;
669
670 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
671 if (r)
672 goto out;
673
674 /* Clear the dirty log */
675 if (is_dirty) {
676 n = kvm_dirty_bitmap_bytes(memslot);
677 memset(memslot->dirty_bitmap, 0, n);
678 }
679 r = 0;
680out:
681 mutex_unlock(&kvm->slots_lock);
682 return r;
683}
684
685static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
686{
687 unsigned int i;
688 struct kvm_vcpu *vcpu;
689
690 kvm_for_each_vcpu(i, vcpu, kvm) {
691 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
692 }
693}
694
695int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
696{
697 int r;
698
699 if (cap->flags)
700 return -EINVAL;
701
702 switch (cap->cap) {
703 case KVM_CAP_S390_IRQCHIP:
704 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
705 kvm->arch.use_irqchip = 1;
706 r = 0;
707 break;
708 case KVM_CAP_S390_USER_SIGP:
709 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
710 kvm->arch.user_sigp = 1;
711 r = 0;
712 break;
713 case KVM_CAP_S390_VECTOR_REGISTERS:
714 mutex_lock(&kvm->lock);
715 if (kvm->created_vcpus) {
716 r = -EBUSY;
717 } else if (MACHINE_HAS_VX) {
718 set_kvm_facility(kvm->arch.model.fac_mask, 129);
719 set_kvm_facility(kvm->arch.model.fac_list, 129);
720 if (test_facility(134)) {
721 set_kvm_facility(kvm->arch.model.fac_mask, 134);
722 set_kvm_facility(kvm->arch.model.fac_list, 134);
723 }
724 if (test_facility(135)) {
725 set_kvm_facility(kvm->arch.model.fac_mask, 135);
726 set_kvm_facility(kvm->arch.model.fac_list, 135);
727 }
728 if (test_facility(148)) {
729 set_kvm_facility(kvm->arch.model.fac_mask, 148);
730 set_kvm_facility(kvm->arch.model.fac_list, 148);
731 }
732 if (test_facility(152)) {
733 set_kvm_facility(kvm->arch.model.fac_mask, 152);
734 set_kvm_facility(kvm->arch.model.fac_list, 152);
735 }
736 if (test_facility(192)) {
737 set_kvm_facility(kvm->arch.model.fac_mask, 192);
738 set_kvm_facility(kvm->arch.model.fac_list, 192);
739 }
740 r = 0;
741 } else
742 r = -EINVAL;
743 mutex_unlock(&kvm->lock);
744 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
745 r ? "(not available)" : "(success)");
746 break;
747 case KVM_CAP_S390_RI:
748 r = -EINVAL;
749 mutex_lock(&kvm->lock);
750 if (kvm->created_vcpus) {
751 r = -EBUSY;
752 } else if (test_facility(64)) {
753 set_kvm_facility(kvm->arch.model.fac_mask, 64);
754 set_kvm_facility(kvm->arch.model.fac_list, 64);
755 r = 0;
756 }
757 mutex_unlock(&kvm->lock);
758 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
759 r ? "(not available)" : "(success)");
760 break;
761 case KVM_CAP_S390_AIS:
762 mutex_lock(&kvm->lock);
763 if (kvm->created_vcpus) {
764 r = -EBUSY;
765 } else {
766 set_kvm_facility(kvm->arch.model.fac_mask, 72);
767 set_kvm_facility(kvm->arch.model.fac_list, 72);
768 r = 0;
769 }
770 mutex_unlock(&kvm->lock);
771 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
772 r ? "(not available)" : "(success)");
773 break;
774 case KVM_CAP_S390_GS:
775 r = -EINVAL;
776 mutex_lock(&kvm->lock);
777 if (kvm->created_vcpus) {
778 r = -EBUSY;
779 } else if (test_facility(133)) {
780 set_kvm_facility(kvm->arch.model.fac_mask, 133);
781 set_kvm_facility(kvm->arch.model.fac_list, 133);
782 r = 0;
783 }
784 mutex_unlock(&kvm->lock);
785 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
786 r ? "(not available)" : "(success)");
787 break;
788 case KVM_CAP_S390_HPAGE_1M:
789 mutex_lock(&kvm->lock);
790 if (kvm->created_vcpus)
791 r = -EBUSY;
792 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
793 r = -EINVAL;
794 else {
795 r = 0;
796 mmap_write_lock(kvm->mm);
797 kvm->mm->context.allow_gmap_hpage_1m = 1;
798 mmap_write_unlock(kvm->mm);
799 /*
800 * We might have to create fake 4k page
801 * tables. To avoid that the hardware works on
802 * stale PGSTEs, we emulate these instructions.
803 */
804 kvm->arch.use_skf = 0;
805 kvm->arch.use_pfmfi = 0;
806 }
807 mutex_unlock(&kvm->lock);
808 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
809 r ? "(not available)" : "(success)");
810 break;
811 case KVM_CAP_S390_USER_STSI:
812 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
813 kvm->arch.user_stsi = 1;
814 r = 0;
815 break;
816 case KVM_CAP_S390_USER_INSTR0:
817 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
818 kvm->arch.user_instr0 = 1;
819 icpt_operexc_on_all_vcpus(kvm);
820 r = 0;
821 break;
822 default:
823 r = -EINVAL;
824 break;
825 }
826 return r;
827}
828
829static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
830{
831 int ret;
832
833 switch (attr->attr) {
834 case KVM_S390_VM_MEM_LIMIT_SIZE:
835 ret = 0;
836 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
837 kvm->arch.mem_limit);
838 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
839 ret = -EFAULT;
840 break;
841 default:
842 ret = -ENXIO;
843 break;
844 }
845 return ret;
846}
847
848static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
849{
850 int ret;
851 unsigned int idx;
852 switch (attr->attr) {
853 case KVM_S390_VM_MEM_ENABLE_CMMA:
854 ret = -ENXIO;
855 if (!sclp.has_cmma)
856 break;
857
858 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
859 mutex_lock(&kvm->lock);
860 if (kvm->created_vcpus)
861 ret = -EBUSY;
862 else if (kvm->mm->context.allow_gmap_hpage_1m)
863 ret = -EINVAL;
864 else {
865 kvm->arch.use_cmma = 1;
866 /* Not compatible with cmma. */
867 kvm->arch.use_pfmfi = 0;
868 ret = 0;
869 }
870 mutex_unlock(&kvm->lock);
871 break;
872 case KVM_S390_VM_MEM_CLR_CMMA:
873 ret = -ENXIO;
874 if (!sclp.has_cmma)
875 break;
876 ret = -EINVAL;
877 if (!kvm->arch.use_cmma)
878 break;
879
880 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
881 mutex_lock(&kvm->lock);
882 idx = srcu_read_lock(&kvm->srcu);
883 s390_reset_cmma(kvm->arch.gmap->mm);
884 srcu_read_unlock(&kvm->srcu, idx);
885 mutex_unlock(&kvm->lock);
886 ret = 0;
887 break;
888 case KVM_S390_VM_MEM_LIMIT_SIZE: {
889 unsigned long new_limit;
890
891 if (kvm_is_ucontrol(kvm))
892 return -EINVAL;
893
894 if (get_user(new_limit, (u64 __user *)attr->addr))
895 return -EFAULT;
896
897 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
898 new_limit > kvm->arch.mem_limit)
899 return -E2BIG;
900
901 if (!new_limit)
902 return -EINVAL;
903
904 /* gmap_create takes last usable address */
905 if (new_limit != KVM_S390_NO_MEM_LIMIT)
906 new_limit -= 1;
907
908 ret = -EBUSY;
909 mutex_lock(&kvm->lock);
910 if (!kvm->created_vcpus) {
911 /* gmap_create will round the limit up */
912 struct gmap *new = gmap_create(current->mm, new_limit);
913
914 if (!new) {
915 ret = -ENOMEM;
916 } else {
917 gmap_remove(kvm->arch.gmap);
918 new->private = kvm;
919 kvm->arch.gmap = new;
920 ret = 0;
921 }
922 }
923 mutex_unlock(&kvm->lock);
924 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
925 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
926 (void *) kvm->arch.gmap->asce);
927 break;
928 }
929 default:
930 ret = -ENXIO;
931 break;
932 }
933 return ret;
934}
935
936static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
937
938void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
939{
940 struct kvm_vcpu *vcpu;
941 int i;
942
943 kvm_s390_vcpu_block_all(kvm);
944
945 kvm_for_each_vcpu(i, vcpu, kvm) {
946 kvm_s390_vcpu_crypto_setup(vcpu);
947 /* recreate the shadow crycb by leaving the VSIE handler */
948 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
949 }
950
951 kvm_s390_vcpu_unblock_all(kvm);
952}
953
954static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
955{
956 mutex_lock(&kvm->lock);
957 switch (attr->attr) {
958 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
959 if (!test_kvm_facility(kvm, 76)) {
960 mutex_unlock(&kvm->lock);
961 return -EINVAL;
962 }
963 get_random_bytes(
964 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
965 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
966 kvm->arch.crypto.aes_kw = 1;
967 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
968 break;
969 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
970 if (!test_kvm_facility(kvm, 76)) {
971 mutex_unlock(&kvm->lock);
972 return -EINVAL;
973 }
974 get_random_bytes(
975 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
976 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
977 kvm->arch.crypto.dea_kw = 1;
978 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
979 break;
980 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
981 if (!test_kvm_facility(kvm, 76)) {
982 mutex_unlock(&kvm->lock);
983 return -EINVAL;
984 }
985 kvm->arch.crypto.aes_kw = 0;
986 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
987 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
988 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
989 break;
990 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
991 if (!test_kvm_facility(kvm, 76)) {
992 mutex_unlock(&kvm->lock);
993 return -EINVAL;
994 }
995 kvm->arch.crypto.dea_kw = 0;
996 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
997 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
998 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
999 break;
1000 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1001 if (!ap_instructions_available()) {
1002 mutex_unlock(&kvm->lock);
1003 return -EOPNOTSUPP;
1004 }
1005 kvm->arch.crypto.apie = 1;
1006 break;
1007 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1008 if (!ap_instructions_available()) {
1009 mutex_unlock(&kvm->lock);
1010 return -EOPNOTSUPP;
1011 }
1012 kvm->arch.crypto.apie = 0;
1013 break;
1014 default:
1015 mutex_unlock(&kvm->lock);
1016 return -ENXIO;
1017 }
1018
1019 kvm_s390_vcpu_crypto_reset_all(kvm);
1020 mutex_unlock(&kvm->lock);
1021 return 0;
1022}
1023
1024static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1025{
1026 int cx;
1027 struct kvm_vcpu *vcpu;
1028
1029 kvm_for_each_vcpu(cx, vcpu, kvm)
1030 kvm_s390_sync_request(req, vcpu);
1031}
1032
1033/*
1034 * Must be called with kvm->srcu held to avoid races on memslots, and with
1035 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1036 */
1037static int kvm_s390_vm_start_migration(struct kvm *kvm)
1038{
1039 struct kvm_memory_slot *ms;
1040 struct kvm_memslots *slots;
1041 unsigned long ram_pages = 0;
1042 int slotnr;
1043
1044 /* migration mode already enabled */
1045 if (kvm->arch.migration_mode)
1046 return 0;
1047 slots = kvm_memslots(kvm);
1048 if (!slots || !slots->used_slots)
1049 return -EINVAL;
1050
1051 if (!kvm->arch.use_cmma) {
1052 kvm->arch.migration_mode = 1;
1053 return 0;
1054 }
1055 /* mark all the pages in active slots as dirty */
1056 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1057 ms = slots->memslots + slotnr;
1058 if (!ms->dirty_bitmap)
1059 return -EINVAL;
1060 /*
1061 * The second half of the bitmap is only used on x86,
1062 * and would be wasted otherwise, so we put it to good
1063 * use here to keep track of the state of the storage
1064 * attributes.
1065 */
1066 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1067 ram_pages += ms->npages;
1068 }
1069 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1070 kvm->arch.migration_mode = 1;
1071 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1072 return 0;
1073}
1074
1075/*
1076 * Must be called with kvm->slots_lock to avoid races with ourselves and
1077 * kvm_s390_vm_start_migration.
1078 */
1079static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1080{
1081 /* migration mode already disabled */
1082 if (!kvm->arch.migration_mode)
1083 return 0;
1084 kvm->arch.migration_mode = 0;
1085 if (kvm->arch.use_cmma)
1086 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1087 return 0;
1088}
1089
1090static int kvm_s390_vm_set_migration(struct kvm *kvm,
1091 struct kvm_device_attr *attr)
1092{
1093 int res = -ENXIO;
1094
1095 mutex_lock(&kvm->slots_lock);
1096 switch (attr->attr) {
1097 case KVM_S390_VM_MIGRATION_START:
1098 res = kvm_s390_vm_start_migration(kvm);
1099 break;
1100 case KVM_S390_VM_MIGRATION_STOP:
1101 res = kvm_s390_vm_stop_migration(kvm);
1102 break;
1103 default:
1104 break;
1105 }
1106 mutex_unlock(&kvm->slots_lock);
1107
1108 return res;
1109}
1110
1111static int kvm_s390_vm_get_migration(struct kvm *kvm,
1112 struct kvm_device_attr *attr)
1113{
1114 u64 mig = kvm->arch.migration_mode;
1115
1116 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1117 return -ENXIO;
1118
1119 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1120 return -EFAULT;
1121 return 0;
1122}
1123
1124static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1125{
1126 struct kvm_s390_vm_tod_clock gtod;
1127
1128 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1129 return -EFAULT;
1130
1131 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1132 return -EINVAL;
1133 kvm_s390_set_tod_clock(kvm, >od);
1134
1135 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1136 gtod.epoch_idx, gtod.tod);
1137
1138 return 0;
1139}
1140
1141static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1142{
1143 u8 gtod_high;
1144
1145 if (copy_from_user(>od_high, (void __user *)attr->addr,
1146 sizeof(gtod_high)))
1147 return -EFAULT;
1148
1149 if (gtod_high != 0)
1150 return -EINVAL;
1151 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1152
1153 return 0;
1154}
1155
1156static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1157{
1158 struct kvm_s390_vm_tod_clock gtod = { 0 };
1159
1160 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1161 sizeof(gtod.tod)))
1162 return -EFAULT;
1163
1164 kvm_s390_set_tod_clock(kvm, >od);
1165 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1166 return 0;
1167}
1168
1169static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1170{
1171 int ret;
1172
1173 if (attr->flags)
1174 return -EINVAL;
1175
1176 switch (attr->attr) {
1177 case KVM_S390_VM_TOD_EXT:
1178 ret = kvm_s390_set_tod_ext(kvm, attr);
1179 break;
1180 case KVM_S390_VM_TOD_HIGH:
1181 ret = kvm_s390_set_tod_high(kvm, attr);
1182 break;
1183 case KVM_S390_VM_TOD_LOW:
1184 ret = kvm_s390_set_tod_low(kvm, attr);
1185 break;
1186 default:
1187 ret = -ENXIO;
1188 break;
1189 }
1190 return ret;
1191}
1192
1193static void kvm_s390_get_tod_clock(struct kvm *kvm,
1194 struct kvm_s390_vm_tod_clock *gtod)
1195{
1196 union tod_clock clk;
1197
1198 preempt_disable();
1199
1200 store_tod_clock_ext(&clk);
1201
1202 gtod->tod = clk.tod + kvm->arch.epoch;
1203 gtod->epoch_idx = 0;
1204 if (test_kvm_facility(kvm, 139)) {
1205 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1206 if (gtod->tod < clk.tod)
1207 gtod->epoch_idx += 1;
1208 }
1209
1210 preempt_enable();
1211}
1212
1213static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1214{
1215 struct kvm_s390_vm_tod_clock gtod;
1216
1217 memset(>od, 0, sizeof(gtod));
1218 kvm_s390_get_tod_clock(kvm, >od);
1219 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1220 return -EFAULT;
1221
1222 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1223 gtod.epoch_idx, gtod.tod);
1224 return 0;
1225}
1226
1227static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1228{
1229 u8 gtod_high = 0;
1230
1231 if (copy_to_user((void __user *)attr->addr, >od_high,
1232 sizeof(gtod_high)))
1233 return -EFAULT;
1234 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1235
1236 return 0;
1237}
1238
1239static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1240{
1241 u64 gtod;
1242
1243 gtod = kvm_s390_get_tod_clock_fast(kvm);
1244 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1245 return -EFAULT;
1246 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1247
1248 return 0;
1249}
1250
1251static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1252{
1253 int ret;
1254
1255 if (attr->flags)
1256 return -EINVAL;
1257
1258 switch (attr->attr) {
1259 case KVM_S390_VM_TOD_EXT:
1260 ret = kvm_s390_get_tod_ext(kvm, attr);
1261 break;
1262 case KVM_S390_VM_TOD_HIGH:
1263 ret = kvm_s390_get_tod_high(kvm, attr);
1264 break;
1265 case KVM_S390_VM_TOD_LOW:
1266 ret = kvm_s390_get_tod_low(kvm, attr);
1267 break;
1268 default:
1269 ret = -ENXIO;
1270 break;
1271 }
1272 return ret;
1273}
1274
1275static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1276{
1277 struct kvm_s390_vm_cpu_processor *proc;
1278 u16 lowest_ibc, unblocked_ibc;
1279 int ret = 0;
1280
1281 mutex_lock(&kvm->lock);
1282 if (kvm->created_vcpus) {
1283 ret = -EBUSY;
1284 goto out;
1285 }
1286 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1287 if (!proc) {
1288 ret = -ENOMEM;
1289 goto out;
1290 }
1291 if (!copy_from_user(proc, (void __user *)attr->addr,
1292 sizeof(*proc))) {
1293 kvm->arch.model.cpuid = proc->cpuid;
1294 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1295 unblocked_ibc = sclp.ibc & 0xfff;
1296 if (lowest_ibc && proc->ibc) {
1297 if (proc->ibc > unblocked_ibc)
1298 kvm->arch.model.ibc = unblocked_ibc;
1299 else if (proc->ibc < lowest_ibc)
1300 kvm->arch.model.ibc = lowest_ibc;
1301 else
1302 kvm->arch.model.ibc = proc->ibc;
1303 }
1304 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1305 S390_ARCH_FAC_LIST_SIZE_BYTE);
1306 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1307 kvm->arch.model.ibc,
1308 kvm->arch.model.cpuid);
1309 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1310 kvm->arch.model.fac_list[0],
1311 kvm->arch.model.fac_list[1],
1312 kvm->arch.model.fac_list[2]);
1313 } else
1314 ret = -EFAULT;
1315 kfree(proc);
1316out:
1317 mutex_unlock(&kvm->lock);
1318 return ret;
1319}
1320
1321static int kvm_s390_set_processor_feat(struct kvm *kvm,
1322 struct kvm_device_attr *attr)
1323{
1324 struct kvm_s390_vm_cpu_feat data;
1325
1326 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1327 return -EFAULT;
1328 if (!bitmap_subset((unsigned long *) data.feat,
1329 kvm_s390_available_cpu_feat,
1330 KVM_S390_VM_CPU_FEAT_NR_BITS))
1331 return -EINVAL;
1332
1333 mutex_lock(&kvm->lock);
1334 if (kvm->created_vcpus) {
1335 mutex_unlock(&kvm->lock);
1336 return -EBUSY;
1337 }
1338 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1339 KVM_S390_VM_CPU_FEAT_NR_BITS);
1340 mutex_unlock(&kvm->lock);
1341 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1342 data.feat[0],
1343 data.feat[1],
1344 data.feat[2]);
1345 return 0;
1346}
1347
1348static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1349 struct kvm_device_attr *attr)
1350{
1351 mutex_lock(&kvm->lock);
1352 if (kvm->created_vcpus) {
1353 mutex_unlock(&kvm->lock);
1354 return -EBUSY;
1355 }
1356
1357 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1358 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1359 mutex_unlock(&kvm->lock);
1360 return -EFAULT;
1361 }
1362 mutex_unlock(&kvm->lock);
1363
1364 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1366 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1367 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1368 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1369 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1370 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1371 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1372 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1375 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1378 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1379 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1380 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1381 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1382 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1383 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1384 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1385 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1387 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1388 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1389 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1390 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1393 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1396 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1397 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1398 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1399 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1400 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1401 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1402 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1403 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1404 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1405 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1406 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1407 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1408 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1409 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1410 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1411 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1413 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1414 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1415 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1416 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1418 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1419 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1420 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1421
1422 return 0;
1423}
1424
1425static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1426{
1427 int ret = -ENXIO;
1428
1429 switch (attr->attr) {
1430 case KVM_S390_VM_CPU_PROCESSOR:
1431 ret = kvm_s390_set_processor(kvm, attr);
1432 break;
1433 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1434 ret = kvm_s390_set_processor_feat(kvm, attr);
1435 break;
1436 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1437 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1438 break;
1439 }
1440 return ret;
1441}
1442
1443static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1444{
1445 struct kvm_s390_vm_cpu_processor *proc;
1446 int ret = 0;
1447
1448 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1449 if (!proc) {
1450 ret = -ENOMEM;
1451 goto out;
1452 }
1453 proc->cpuid = kvm->arch.model.cpuid;
1454 proc->ibc = kvm->arch.model.ibc;
1455 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1456 S390_ARCH_FAC_LIST_SIZE_BYTE);
1457 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1458 kvm->arch.model.ibc,
1459 kvm->arch.model.cpuid);
1460 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1461 kvm->arch.model.fac_list[0],
1462 kvm->arch.model.fac_list[1],
1463 kvm->arch.model.fac_list[2]);
1464 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1465 ret = -EFAULT;
1466 kfree(proc);
1467out:
1468 return ret;
1469}
1470
1471static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1472{
1473 struct kvm_s390_vm_cpu_machine *mach;
1474 int ret = 0;
1475
1476 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1477 if (!mach) {
1478 ret = -ENOMEM;
1479 goto out;
1480 }
1481 get_cpu_id((struct cpuid *) &mach->cpuid);
1482 mach->ibc = sclp.ibc;
1483 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1484 S390_ARCH_FAC_LIST_SIZE_BYTE);
1485 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1486 sizeof(stfle_fac_list));
1487 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1488 kvm->arch.model.ibc,
1489 kvm->arch.model.cpuid);
1490 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1491 mach->fac_mask[0],
1492 mach->fac_mask[1],
1493 mach->fac_mask[2]);
1494 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1495 mach->fac_list[0],
1496 mach->fac_list[1],
1497 mach->fac_list[2]);
1498 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1499 ret = -EFAULT;
1500 kfree(mach);
1501out:
1502 return ret;
1503}
1504
1505static int kvm_s390_get_processor_feat(struct kvm *kvm,
1506 struct kvm_device_attr *attr)
1507{
1508 struct kvm_s390_vm_cpu_feat data;
1509
1510 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1511 KVM_S390_VM_CPU_FEAT_NR_BITS);
1512 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1513 return -EFAULT;
1514 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1515 data.feat[0],
1516 data.feat[1],
1517 data.feat[2]);
1518 return 0;
1519}
1520
1521static int kvm_s390_get_machine_feat(struct kvm *kvm,
1522 struct kvm_device_attr *attr)
1523{
1524 struct kvm_s390_vm_cpu_feat data;
1525
1526 bitmap_copy((unsigned long *) data.feat,
1527 kvm_s390_available_cpu_feat,
1528 KVM_S390_VM_CPU_FEAT_NR_BITS);
1529 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1530 return -EFAULT;
1531 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1532 data.feat[0],
1533 data.feat[1],
1534 data.feat[2]);
1535 return 0;
1536}
1537
1538static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1539 struct kvm_device_attr *attr)
1540{
1541 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1542 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1543 return -EFAULT;
1544
1545 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1547 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1548 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1549 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1550 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1551 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1552 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1553 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1556 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1559 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1560 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1561 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1562 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1563 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1564 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1565 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1566 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1568 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1569 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1570 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1571 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1574 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1577 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1578 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1579 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1580 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1581 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1582 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1583 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1584 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1585 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1586 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1587 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1588 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1589 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1590 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1591 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1592 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1594 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1595 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1596 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1597 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1599 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1600 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1601 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1602
1603 return 0;
1604}
1605
1606static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1607 struct kvm_device_attr *attr)
1608{
1609 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1610 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1611 return -EFAULT;
1612
1613 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1615 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1616 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1617 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1618 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1619 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1620 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1621 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1622 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1623 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1624 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1627 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1628 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1629 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1630 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1631 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1632 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1633 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1634 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1635 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1636 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1637 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1638 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1639 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1640 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1641 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1642 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1643 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1644 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1645 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1646 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1647 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1648 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1649 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1650 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1651 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1652 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1653 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1654 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1655 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1656 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1657 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1658 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1659 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1660 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1662 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1663 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1664 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1665 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1667 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1668 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1669 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1670
1671 return 0;
1672}
1673
1674static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1675{
1676 int ret = -ENXIO;
1677
1678 switch (attr->attr) {
1679 case KVM_S390_VM_CPU_PROCESSOR:
1680 ret = kvm_s390_get_processor(kvm, attr);
1681 break;
1682 case KVM_S390_VM_CPU_MACHINE:
1683 ret = kvm_s390_get_machine(kvm, attr);
1684 break;
1685 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1686 ret = kvm_s390_get_processor_feat(kvm, attr);
1687 break;
1688 case KVM_S390_VM_CPU_MACHINE_FEAT:
1689 ret = kvm_s390_get_machine_feat(kvm, attr);
1690 break;
1691 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1692 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1693 break;
1694 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1695 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1696 break;
1697 }
1698 return ret;
1699}
1700
1701static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1702{
1703 int ret;
1704
1705 switch (attr->group) {
1706 case KVM_S390_VM_MEM_CTRL:
1707 ret = kvm_s390_set_mem_control(kvm, attr);
1708 break;
1709 case KVM_S390_VM_TOD:
1710 ret = kvm_s390_set_tod(kvm, attr);
1711 break;
1712 case KVM_S390_VM_CPU_MODEL:
1713 ret = kvm_s390_set_cpu_model(kvm, attr);
1714 break;
1715 case KVM_S390_VM_CRYPTO:
1716 ret = kvm_s390_vm_set_crypto(kvm, attr);
1717 break;
1718 case KVM_S390_VM_MIGRATION:
1719 ret = kvm_s390_vm_set_migration(kvm, attr);
1720 break;
1721 default:
1722 ret = -ENXIO;
1723 break;
1724 }
1725
1726 return ret;
1727}
1728
1729static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1730{
1731 int ret;
1732
1733 switch (attr->group) {
1734 case KVM_S390_VM_MEM_CTRL:
1735 ret = kvm_s390_get_mem_control(kvm, attr);
1736 break;
1737 case KVM_S390_VM_TOD:
1738 ret = kvm_s390_get_tod(kvm, attr);
1739 break;
1740 case KVM_S390_VM_CPU_MODEL:
1741 ret = kvm_s390_get_cpu_model(kvm, attr);
1742 break;
1743 case KVM_S390_VM_MIGRATION:
1744 ret = kvm_s390_vm_get_migration(kvm, attr);
1745 break;
1746 default:
1747 ret = -ENXIO;
1748 break;
1749 }
1750
1751 return ret;
1752}
1753
1754static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1755{
1756 int ret;
1757
1758 switch (attr->group) {
1759 case KVM_S390_VM_MEM_CTRL:
1760 switch (attr->attr) {
1761 case KVM_S390_VM_MEM_ENABLE_CMMA:
1762 case KVM_S390_VM_MEM_CLR_CMMA:
1763 ret = sclp.has_cmma ? 0 : -ENXIO;
1764 break;
1765 case KVM_S390_VM_MEM_LIMIT_SIZE:
1766 ret = 0;
1767 break;
1768 default:
1769 ret = -ENXIO;
1770 break;
1771 }
1772 break;
1773 case KVM_S390_VM_TOD:
1774 switch (attr->attr) {
1775 case KVM_S390_VM_TOD_LOW:
1776 case KVM_S390_VM_TOD_HIGH:
1777 ret = 0;
1778 break;
1779 default:
1780 ret = -ENXIO;
1781 break;
1782 }
1783 break;
1784 case KVM_S390_VM_CPU_MODEL:
1785 switch (attr->attr) {
1786 case KVM_S390_VM_CPU_PROCESSOR:
1787 case KVM_S390_VM_CPU_MACHINE:
1788 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1789 case KVM_S390_VM_CPU_MACHINE_FEAT:
1790 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1791 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1792 ret = 0;
1793 break;
1794 default:
1795 ret = -ENXIO;
1796 break;
1797 }
1798 break;
1799 case KVM_S390_VM_CRYPTO:
1800 switch (attr->attr) {
1801 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1802 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1803 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1804 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1805 ret = 0;
1806 break;
1807 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1808 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1809 ret = ap_instructions_available() ? 0 : -ENXIO;
1810 break;
1811 default:
1812 ret = -ENXIO;
1813 break;
1814 }
1815 break;
1816 case KVM_S390_VM_MIGRATION:
1817 ret = 0;
1818 break;
1819 default:
1820 ret = -ENXIO;
1821 break;
1822 }
1823
1824 return ret;
1825}
1826
1827static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1828{
1829 uint8_t *keys;
1830 uint64_t hva;
1831 int srcu_idx, i, r = 0;
1832
1833 if (args->flags != 0)
1834 return -EINVAL;
1835
1836 /* Is this guest using storage keys? */
1837 if (!mm_uses_skeys(current->mm))
1838 return KVM_S390_GET_SKEYS_NONE;
1839
1840 /* Enforce sane limit on memory allocation */
1841 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1842 return -EINVAL;
1843
1844 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1845 if (!keys)
1846 return -ENOMEM;
1847
1848 mmap_read_lock(current->mm);
1849 srcu_idx = srcu_read_lock(&kvm->srcu);
1850 for (i = 0; i < args->count; i++) {
1851 hva = gfn_to_hva(kvm, args->start_gfn + i);
1852 if (kvm_is_error_hva(hva)) {
1853 r = -EFAULT;
1854 break;
1855 }
1856
1857 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1858 if (r)
1859 break;
1860 }
1861 srcu_read_unlock(&kvm->srcu, srcu_idx);
1862 mmap_read_unlock(current->mm);
1863
1864 if (!r) {
1865 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1866 sizeof(uint8_t) * args->count);
1867 if (r)
1868 r = -EFAULT;
1869 }
1870
1871 kvfree(keys);
1872 return r;
1873}
1874
1875static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1876{
1877 uint8_t *keys;
1878 uint64_t hva;
1879 int srcu_idx, i, r = 0;
1880 bool unlocked;
1881
1882 if (args->flags != 0)
1883 return -EINVAL;
1884
1885 /* Enforce sane limit on memory allocation */
1886 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1887 return -EINVAL;
1888
1889 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1890 if (!keys)
1891 return -ENOMEM;
1892
1893 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1894 sizeof(uint8_t) * args->count);
1895 if (r) {
1896 r = -EFAULT;
1897 goto out;
1898 }
1899
1900 /* Enable storage key handling for the guest */
1901 r = s390_enable_skey();
1902 if (r)
1903 goto out;
1904
1905 i = 0;
1906 mmap_read_lock(current->mm);
1907 srcu_idx = srcu_read_lock(&kvm->srcu);
1908 while (i < args->count) {
1909 unlocked = false;
1910 hva = gfn_to_hva(kvm, args->start_gfn + i);
1911 if (kvm_is_error_hva(hva)) {
1912 r = -EFAULT;
1913 break;
1914 }
1915
1916 /* Lowest order bit is reserved */
1917 if (keys[i] & 0x01) {
1918 r = -EINVAL;
1919 break;
1920 }
1921
1922 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1923 if (r) {
1924 r = fixup_user_fault(current->mm, hva,
1925 FAULT_FLAG_WRITE, &unlocked);
1926 if (r)
1927 break;
1928 }
1929 if (!r)
1930 i++;
1931 }
1932 srcu_read_unlock(&kvm->srcu, srcu_idx);
1933 mmap_read_unlock(current->mm);
1934out:
1935 kvfree(keys);
1936 return r;
1937}
1938
1939/*
1940 * Base address and length must be sent at the start of each block, therefore
1941 * it's cheaper to send some clean data, as long as it's less than the size of
1942 * two longs.
1943 */
1944#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1945/* for consistency */
1946#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1947
1948/*
1949 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1950 * address falls in a hole. In that case the index of one of the memslots
1951 * bordering the hole is returned.
1952 */
1953static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1954{
1955 int start = 0, end = slots->used_slots;
1956 int slot = atomic_read(&slots->lru_slot);
1957 struct kvm_memory_slot *memslots = slots->memslots;
1958
1959 if (gfn >= memslots[slot].base_gfn &&
1960 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1961 return slot;
1962
1963 while (start < end) {
1964 slot = start + (end - start) / 2;
1965
1966 if (gfn >= memslots[slot].base_gfn)
1967 end = slot;
1968 else
1969 start = slot + 1;
1970 }
1971
1972 if (start >= slots->used_slots)
1973 return slots->used_slots - 1;
1974
1975 if (gfn >= memslots[start].base_gfn &&
1976 gfn < memslots[start].base_gfn + memslots[start].npages) {
1977 atomic_set(&slots->lru_slot, start);
1978 }
1979
1980 return start;
1981}
1982
1983static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1984 u8 *res, unsigned long bufsize)
1985{
1986 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1987
1988 args->count = 0;
1989 while (args->count < bufsize) {
1990 hva = gfn_to_hva(kvm, cur_gfn);
1991 /*
1992 * We return an error if the first value was invalid, but we
1993 * return successfully if at least one value was copied.
1994 */
1995 if (kvm_is_error_hva(hva))
1996 return args->count ? 0 : -EFAULT;
1997 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1998 pgstev = 0;
1999 res[args->count++] = (pgstev >> 24) & 0x43;
2000 cur_gfn++;
2001 }
2002
2003 return 0;
2004}
2005
2006static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2007 unsigned long cur_gfn)
2008{
2009 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2010 struct kvm_memory_slot *ms = slots->memslots + slotidx;
2011 unsigned long ofs = cur_gfn - ms->base_gfn;
2012
2013 if (ms->base_gfn + ms->npages <= cur_gfn) {
2014 slotidx--;
2015 /* If we are above the highest slot, wrap around */
2016 if (slotidx < 0)
2017 slotidx = slots->used_slots - 1;
2018
2019 ms = slots->memslots + slotidx;
2020 ofs = 0;
2021 }
2022 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2023 while ((slotidx > 0) && (ofs >= ms->npages)) {
2024 slotidx--;
2025 ms = slots->memslots + slotidx;
2026 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2027 }
2028 return ms->base_gfn + ofs;
2029}
2030
2031static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2032 u8 *res, unsigned long bufsize)
2033{
2034 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2035 struct kvm_memslots *slots = kvm_memslots(kvm);
2036 struct kvm_memory_slot *ms;
2037
2038 if (unlikely(!slots->used_slots))
2039 return 0;
2040
2041 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2042 ms = gfn_to_memslot(kvm, cur_gfn);
2043 args->count = 0;
2044 args->start_gfn = cur_gfn;
2045 if (!ms)
2046 return 0;
2047 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2048 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2049
2050 while (args->count < bufsize) {
2051 hva = gfn_to_hva(kvm, cur_gfn);
2052 if (kvm_is_error_hva(hva))
2053 return 0;
2054 /* Decrement only if we actually flipped the bit to 0 */
2055 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2056 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2057 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2058 pgstev = 0;
2059 /* Save the value */
2060 res[args->count++] = (pgstev >> 24) & 0x43;
2061 /* If the next bit is too far away, stop. */
2062 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2063 return 0;
2064 /* If we reached the previous "next", find the next one */
2065 if (cur_gfn == next_gfn)
2066 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2067 /* Reached the end of memory or of the buffer, stop */
2068 if ((next_gfn >= mem_end) ||
2069 (next_gfn - args->start_gfn >= bufsize))
2070 return 0;
2071 cur_gfn++;
2072 /* Reached the end of the current memslot, take the next one. */
2073 if (cur_gfn - ms->base_gfn >= ms->npages) {
2074 ms = gfn_to_memslot(kvm, cur_gfn);
2075 if (!ms)
2076 return 0;
2077 }
2078 }
2079 return 0;
2080}
2081
2082/*
2083 * This function searches for the next page with dirty CMMA attributes, and
2084 * saves the attributes in the buffer up to either the end of the buffer or
2085 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2086 * no trailing clean bytes are saved.
2087 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2088 * output buffer will indicate 0 as length.
2089 */
2090static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2091 struct kvm_s390_cmma_log *args)
2092{
2093 unsigned long bufsize;
2094 int srcu_idx, peek, ret;
2095 u8 *values;
2096
2097 if (!kvm->arch.use_cmma)
2098 return -ENXIO;
2099 /* Invalid/unsupported flags were specified */
2100 if (args->flags & ~KVM_S390_CMMA_PEEK)
2101 return -EINVAL;
2102 /* Migration mode query, and we are not doing a migration */
2103 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2104 if (!peek && !kvm->arch.migration_mode)
2105 return -EINVAL;
2106 /* CMMA is disabled or was not used, or the buffer has length zero */
2107 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2108 if (!bufsize || !kvm->mm->context.uses_cmm) {
2109 memset(args, 0, sizeof(*args));
2110 return 0;
2111 }
2112 /* We are not peeking, and there are no dirty pages */
2113 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2114 memset(args, 0, sizeof(*args));
2115 return 0;
2116 }
2117
2118 values = vmalloc(bufsize);
2119 if (!values)
2120 return -ENOMEM;
2121
2122 mmap_read_lock(kvm->mm);
2123 srcu_idx = srcu_read_lock(&kvm->srcu);
2124 if (peek)
2125 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2126 else
2127 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2128 srcu_read_unlock(&kvm->srcu, srcu_idx);
2129 mmap_read_unlock(kvm->mm);
2130
2131 if (kvm->arch.migration_mode)
2132 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2133 else
2134 args->remaining = 0;
2135
2136 if (copy_to_user((void __user *)args->values, values, args->count))
2137 ret = -EFAULT;
2138
2139 vfree(values);
2140 return ret;
2141}
2142
2143/*
2144 * This function sets the CMMA attributes for the given pages. If the input
2145 * buffer has zero length, no action is taken, otherwise the attributes are
2146 * set and the mm->context.uses_cmm flag is set.
2147 */
2148static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2149 const struct kvm_s390_cmma_log *args)
2150{
2151 unsigned long hva, mask, pgstev, i;
2152 uint8_t *bits;
2153 int srcu_idx, r = 0;
2154
2155 mask = args->mask;
2156
2157 if (!kvm->arch.use_cmma)
2158 return -ENXIO;
2159 /* invalid/unsupported flags */
2160 if (args->flags != 0)
2161 return -EINVAL;
2162 /* Enforce sane limit on memory allocation */
2163 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2164 return -EINVAL;
2165 /* Nothing to do */
2166 if (args->count == 0)
2167 return 0;
2168
2169 bits = vmalloc(array_size(sizeof(*bits), args->count));
2170 if (!bits)
2171 return -ENOMEM;
2172
2173 r = copy_from_user(bits, (void __user *)args->values, args->count);
2174 if (r) {
2175 r = -EFAULT;
2176 goto out;
2177 }
2178
2179 mmap_read_lock(kvm->mm);
2180 srcu_idx = srcu_read_lock(&kvm->srcu);
2181 for (i = 0; i < args->count; i++) {
2182 hva = gfn_to_hva(kvm, args->start_gfn + i);
2183 if (kvm_is_error_hva(hva)) {
2184 r = -EFAULT;
2185 break;
2186 }
2187
2188 pgstev = bits[i];
2189 pgstev = pgstev << 24;
2190 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2191 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2192 }
2193 srcu_read_unlock(&kvm->srcu, srcu_idx);
2194 mmap_read_unlock(kvm->mm);
2195
2196 if (!kvm->mm->context.uses_cmm) {
2197 mmap_write_lock(kvm->mm);
2198 kvm->mm->context.uses_cmm = 1;
2199 mmap_write_unlock(kvm->mm);
2200 }
2201out:
2202 vfree(bits);
2203 return r;
2204}
2205
2206static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2207{
2208 struct kvm_vcpu *vcpu;
2209 u16 rc, rrc;
2210 int ret = 0;
2211 int i;
2212
2213 /*
2214 * We ignore failures and try to destroy as many CPUs as possible.
2215 * At the same time we must not free the assigned resources when
2216 * this fails, as the ultravisor has still access to that memory.
2217 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2218 * behind.
2219 * We want to return the first failure rc and rrc, though.
2220 */
2221 kvm_for_each_vcpu(i, vcpu, kvm) {
2222 mutex_lock(&vcpu->mutex);
2223 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2224 *rcp = rc;
2225 *rrcp = rrc;
2226 ret = -EIO;
2227 }
2228 mutex_unlock(&vcpu->mutex);
2229 }
2230 return ret;
2231}
2232
2233static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2234{
2235 int i, r = 0;
2236 u16 dummy;
2237
2238 struct kvm_vcpu *vcpu;
2239
2240 kvm_for_each_vcpu(i, vcpu, kvm) {
2241 mutex_lock(&vcpu->mutex);
2242 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2243 mutex_unlock(&vcpu->mutex);
2244 if (r)
2245 break;
2246 }
2247 if (r)
2248 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2249 return r;
2250}
2251
2252static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2253{
2254 int r = 0;
2255 u16 dummy;
2256 void __user *argp = (void __user *)cmd->data;
2257
2258 switch (cmd->cmd) {
2259 case KVM_PV_ENABLE: {
2260 r = -EINVAL;
2261 if (kvm_s390_pv_is_protected(kvm))
2262 break;
2263
2264 /*
2265 * FMT 4 SIE needs esca. As we never switch back to bsca from
2266 * esca, we need no cleanup in the error cases below
2267 */
2268 r = sca_switch_to_extended(kvm);
2269 if (r)
2270 break;
2271
2272 mmap_write_lock(current->mm);
2273 r = gmap_mark_unmergeable();
2274 mmap_write_unlock(current->mm);
2275 if (r)
2276 break;
2277
2278 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2279 if (r)
2280 break;
2281
2282 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2283 if (r)
2284 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2285
2286 /* we need to block service interrupts from now on */
2287 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2288 break;
2289 }
2290 case KVM_PV_DISABLE: {
2291 r = -EINVAL;
2292 if (!kvm_s390_pv_is_protected(kvm))
2293 break;
2294
2295 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2296 /*
2297 * If a CPU could not be destroyed, destroy VM will also fail.
2298 * There is no point in trying to destroy it. Instead return
2299 * the rc and rrc from the first CPU that failed destroying.
2300 */
2301 if (r)
2302 break;
2303 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2304
2305 /* no need to block service interrupts any more */
2306 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2307 break;
2308 }
2309 case KVM_PV_SET_SEC_PARMS: {
2310 struct kvm_s390_pv_sec_parm parms = {};
2311 void *hdr;
2312
2313 r = -EINVAL;
2314 if (!kvm_s390_pv_is_protected(kvm))
2315 break;
2316
2317 r = -EFAULT;
2318 if (copy_from_user(&parms, argp, sizeof(parms)))
2319 break;
2320
2321 /* Currently restricted to 8KB */
2322 r = -EINVAL;
2323 if (parms.length > PAGE_SIZE * 2)
2324 break;
2325
2326 r = -ENOMEM;
2327 hdr = vmalloc(parms.length);
2328 if (!hdr)
2329 break;
2330
2331 r = -EFAULT;
2332 if (!copy_from_user(hdr, (void __user *)parms.origin,
2333 parms.length))
2334 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2335 &cmd->rc, &cmd->rrc);
2336
2337 vfree(hdr);
2338 break;
2339 }
2340 case KVM_PV_UNPACK: {
2341 struct kvm_s390_pv_unp unp = {};
2342
2343 r = -EINVAL;
2344 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2345 break;
2346
2347 r = -EFAULT;
2348 if (copy_from_user(&unp, argp, sizeof(unp)))
2349 break;
2350
2351 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2352 &cmd->rc, &cmd->rrc);
2353 break;
2354 }
2355 case KVM_PV_VERIFY: {
2356 r = -EINVAL;
2357 if (!kvm_s390_pv_is_protected(kvm))
2358 break;
2359
2360 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2361 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2362 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2363 cmd->rrc);
2364 break;
2365 }
2366 case KVM_PV_PREP_RESET: {
2367 r = -EINVAL;
2368 if (!kvm_s390_pv_is_protected(kvm))
2369 break;
2370
2371 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2372 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2373 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2374 cmd->rc, cmd->rrc);
2375 break;
2376 }
2377 case KVM_PV_UNSHARE_ALL: {
2378 r = -EINVAL;
2379 if (!kvm_s390_pv_is_protected(kvm))
2380 break;
2381
2382 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2383 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2384 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2385 cmd->rc, cmd->rrc);
2386 break;
2387 }
2388 default:
2389 r = -ENOTTY;
2390 }
2391 return r;
2392}
2393
2394long kvm_arch_vm_ioctl(struct file *filp,
2395 unsigned int ioctl, unsigned long arg)
2396{
2397 struct kvm *kvm = filp->private_data;
2398 void __user *argp = (void __user *)arg;
2399 struct kvm_device_attr attr;
2400 int r;
2401
2402 switch (ioctl) {
2403 case KVM_S390_INTERRUPT: {
2404 struct kvm_s390_interrupt s390int;
2405
2406 r = -EFAULT;
2407 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2408 break;
2409 r = kvm_s390_inject_vm(kvm, &s390int);
2410 break;
2411 }
2412 case KVM_CREATE_IRQCHIP: {
2413 struct kvm_irq_routing_entry routing;
2414
2415 r = -EINVAL;
2416 if (kvm->arch.use_irqchip) {
2417 /* Set up dummy routing. */
2418 memset(&routing, 0, sizeof(routing));
2419 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2420 }
2421 break;
2422 }
2423 case KVM_SET_DEVICE_ATTR: {
2424 r = -EFAULT;
2425 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2426 break;
2427 r = kvm_s390_vm_set_attr(kvm, &attr);
2428 break;
2429 }
2430 case KVM_GET_DEVICE_ATTR: {
2431 r = -EFAULT;
2432 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2433 break;
2434 r = kvm_s390_vm_get_attr(kvm, &attr);
2435 break;
2436 }
2437 case KVM_HAS_DEVICE_ATTR: {
2438 r = -EFAULT;
2439 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2440 break;
2441 r = kvm_s390_vm_has_attr(kvm, &attr);
2442 break;
2443 }
2444 case KVM_S390_GET_SKEYS: {
2445 struct kvm_s390_skeys args;
2446
2447 r = -EFAULT;
2448 if (copy_from_user(&args, argp,
2449 sizeof(struct kvm_s390_skeys)))
2450 break;
2451 r = kvm_s390_get_skeys(kvm, &args);
2452 break;
2453 }
2454 case KVM_S390_SET_SKEYS: {
2455 struct kvm_s390_skeys args;
2456
2457 r = -EFAULT;
2458 if (copy_from_user(&args, argp,
2459 sizeof(struct kvm_s390_skeys)))
2460 break;
2461 r = kvm_s390_set_skeys(kvm, &args);
2462 break;
2463 }
2464 case KVM_S390_GET_CMMA_BITS: {
2465 struct kvm_s390_cmma_log args;
2466
2467 r = -EFAULT;
2468 if (copy_from_user(&args, argp, sizeof(args)))
2469 break;
2470 mutex_lock(&kvm->slots_lock);
2471 r = kvm_s390_get_cmma_bits(kvm, &args);
2472 mutex_unlock(&kvm->slots_lock);
2473 if (!r) {
2474 r = copy_to_user(argp, &args, sizeof(args));
2475 if (r)
2476 r = -EFAULT;
2477 }
2478 break;
2479 }
2480 case KVM_S390_SET_CMMA_BITS: {
2481 struct kvm_s390_cmma_log args;
2482
2483 r = -EFAULT;
2484 if (copy_from_user(&args, argp, sizeof(args)))
2485 break;
2486 mutex_lock(&kvm->slots_lock);
2487 r = kvm_s390_set_cmma_bits(kvm, &args);
2488 mutex_unlock(&kvm->slots_lock);
2489 break;
2490 }
2491 case KVM_S390_PV_COMMAND: {
2492 struct kvm_pv_cmd args;
2493
2494 /* protvirt means user sigp */
2495 kvm->arch.user_cpu_state_ctrl = 1;
2496 r = 0;
2497 if (!is_prot_virt_host()) {
2498 r = -EINVAL;
2499 break;
2500 }
2501 if (copy_from_user(&args, argp, sizeof(args))) {
2502 r = -EFAULT;
2503 break;
2504 }
2505 if (args.flags) {
2506 r = -EINVAL;
2507 break;
2508 }
2509 mutex_lock(&kvm->lock);
2510 r = kvm_s390_handle_pv(kvm, &args);
2511 mutex_unlock(&kvm->lock);
2512 if (copy_to_user(argp, &args, sizeof(args))) {
2513 r = -EFAULT;
2514 break;
2515 }
2516 break;
2517 }
2518 default:
2519 r = -ENOTTY;
2520 }
2521
2522 return r;
2523}
2524
2525static int kvm_s390_apxa_installed(void)
2526{
2527 struct ap_config_info info;
2528
2529 if (ap_instructions_available()) {
2530 if (ap_qci(&info) == 0)
2531 return info.apxa;
2532 }
2533
2534 return 0;
2535}
2536
2537/*
2538 * The format of the crypto control block (CRYCB) is specified in the 3 low
2539 * order bits of the CRYCB designation (CRYCBD) field as follows:
2540 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2541 * AP extended addressing (APXA) facility are installed.
2542 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2543 * Format 2: Both the APXA and MSAX3 facilities are installed
2544 */
2545static void kvm_s390_set_crycb_format(struct kvm *kvm)
2546{
2547 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2548
2549 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2550 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2551
2552 /* Check whether MSAX3 is installed */
2553 if (!test_kvm_facility(kvm, 76))
2554 return;
2555
2556 if (kvm_s390_apxa_installed())
2557 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2558 else
2559 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2560}
2561
2562void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2563 unsigned long *aqm, unsigned long *adm)
2564{
2565 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2566
2567 mutex_lock(&kvm->lock);
2568 kvm_s390_vcpu_block_all(kvm);
2569
2570 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2571 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2572 memcpy(crycb->apcb1.apm, apm, 32);
2573 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2574 apm[0], apm[1], apm[2], apm[3]);
2575 memcpy(crycb->apcb1.aqm, aqm, 32);
2576 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2577 aqm[0], aqm[1], aqm[2], aqm[3]);
2578 memcpy(crycb->apcb1.adm, adm, 32);
2579 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2580 adm[0], adm[1], adm[2], adm[3]);
2581 break;
2582 case CRYCB_FORMAT1:
2583 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2584 memcpy(crycb->apcb0.apm, apm, 8);
2585 memcpy(crycb->apcb0.aqm, aqm, 2);
2586 memcpy(crycb->apcb0.adm, adm, 2);
2587 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2588 apm[0], *((unsigned short *)aqm),
2589 *((unsigned short *)adm));
2590 break;
2591 default: /* Can not happen */
2592 break;
2593 }
2594
2595 /* recreate the shadow crycb for each vcpu */
2596 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2597 kvm_s390_vcpu_unblock_all(kvm);
2598 mutex_unlock(&kvm->lock);
2599}
2600EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2601
2602void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2603{
2604 mutex_lock(&kvm->lock);
2605 kvm_s390_vcpu_block_all(kvm);
2606
2607 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2608 sizeof(kvm->arch.crypto.crycb->apcb0));
2609 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2610 sizeof(kvm->arch.crypto.crycb->apcb1));
2611
2612 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2613 /* recreate the shadow crycb for each vcpu */
2614 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2615 kvm_s390_vcpu_unblock_all(kvm);
2616 mutex_unlock(&kvm->lock);
2617}
2618EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2619
2620static u64 kvm_s390_get_initial_cpuid(void)
2621{
2622 struct cpuid cpuid;
2623
2624 get_cpu_id(&cpuid);
2625 cpuid.version = 0xff;
2626 return *((u64 *) &cpuid);
2627}
2628
2629static void kvm_s390_crypto_init(struct kvm *kvm)
2630{
2631 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2632 kvm_s390_set_crycb_format(kvm);
2633
2634 if (!test_kvm_facility(kvm, 76))
2635 return;
2636
2637 /* Enable AES/DEA protected key functions by default */
2638 kvm->arch.crypto.aes_kw = 1;
2639 kvm->arch.crypto.dea_kw = 1;
2640 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2641 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2642 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2643 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2644}
2645
2646static void sca_dispose(struct kvm *kvm)
2647{
2648 if (kvm->arch.use_esca)
2649 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2650 else
2651 free_page((unsigned long)(kvm->arch.sca));
2652 kvm->arch.sca = NULL;
2653}
2654
2655int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2656{
2657 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2658 int i, rc;
2659 char debug_name[16];
2660 static unsigned long sca_offset;
2661
2662 rc = -EINVAL;
2663#ifdef CONFIG_KVM_S390_UCONTROL
2664 if (type & ~KVM_VM_S390_UCONTROL)
2665 goto out_err;
2666 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2667 goto out_err;
2668#else
2669 if (type)
2670 goto out_err;
2671#endif
2672
2673 rc = s390_enable_sie();
2674 if (rc)
2675 goto out_err;
2676
2677 rc = -ENOMEM;
2678
2679 if (!sclp.has_64bscao)
2680 alloc_flags |= GFP_DMA;
2681 rwlock_init(&kvm->arch.sca_lock);
2682 /* start with basic SCA */
2683 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2684 if (!kvm->arch.sca)
2685 goto out_err;
2686 mutex_lock(&kvm_lock);
2687 sca_offset += 16;
2688 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2689 sca_offset = 0;
2690 kvm->arch.sca = (struct bsca_block *)
2691 ((char *) kvm->arch.sca + sca_offset);
2692 mutex_unlock(&kvm_lock);
2693
2694 sprintf(debug_name, "kvm-%u", current->pid);
2695
2696 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2697 if (!kvm->arch.dbf)
2698 goto out_err;
2699
2700 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2701 kvm->arch.sie_page2 =
2702 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2703 if (!kvm->arch.sie_page2)
2704 goto out_err;
2705
2706 kvm->arch.sie_page2->kvm = kvm;
2707 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2708
2709 for (i = 0; i < kvm_s390_fac_size(); i++) {
2710 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2711 (kvm_s390_fac_base[i] |
2712 kvm_s390_fac_ext[i]);
2713 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2714 kvm_s390_fac_base[i];
2715 }
2716 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2717
2718 /* we are always in czam mode - even on pre z14 machines */
2719 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2720 set_kvm_facility(kvm->arch.model.fac_list, 138);
2721 /* we emulate STHYI in kvm */
2722 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2723 set_kvm_facility(kvm->arch.model.fac_list, 74);
2724 if (MACHINE_HAS_TLB_GUEST) {
2725 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2726 set_kvm_facility(kvm->arch.model.fac_list, 147);
2727 }
2728
2729 if (css_general_characteristics.aiv && test_facility(65))
2730 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2731
2732 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2733 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2734
2735 kvm_s390_crypto_init(kvm);
2736
2737 mutex_init(&kvm->arch.float_int.ais_lock);
2738 spin_lock_init(&kvm->arch.float_int.lock);
2739 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2740 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2741 init_waitqueue_head(&kvm->arch.ipte_wq);
2742 mutex_init(&kvm->arch.ipte_mutex);
2743
2744 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2745 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2746
2747 if (type & KVM_VM_S390_UCONTROL) {
2748 kvm->arch.gmap = NULL;
2749 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2750 } else {
2751 if (sclp.hamax == U64_MAX)
2752 kvm->arch.mem_limit = TASK_SIZE_MAX;
2753 else
2754 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2755 sclp.hamax + 1);
2756 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2757 if (!kvm->arch.gmap)
2758 goto out_err;
2759 kvm->arch.gmap->private = kvm;
2760 kvm->arch.gmap->pfault_enabled = 0;
2761 }
2762
2763 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2764 kvm->arch.use_skf = sclp.has_skey;
2765 spin_lock_init(&kvm->arch.start_stop_lock);
2766 kvm_s390_vsie_init(kvm);
2767 if (use_gisa)
2768 kvm_s390_gisa_init(kvm);
2769 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2770
2771 return 0;
2772out_err:
2773 free_page((unsigned long)kvm->arch.sie_page2);
2774 debug_unregister(kvm->arch.dbf);
2775 sca_dispose(kvm);
2776 KVM_EVENT(3, "creation of vm failed: %d", rc);
2777 return rc;
2778}
2779
2780void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2781{
2782 u16 rc, rrc;
2783
2784 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2785 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2786 kvm_s390_clear_local_irqs(vcpu);
2787 kvm_clear_async_pf_completion_queue(vcpu);
2788 if (!kvm_is_ucontrol(vcpu->kvm))
2789 sca_del_vcpu(vcpu);
2790
2791 if (kvm_is_ucontrol(vcpu->kvm))
2792 gmap_remove(vcpu->arch.gmap);
2793
2794 if (vcpu->kvm->arch.use_cmma)
2795 kvm_s390_vcpu_unsetup_cmma(vcpu);
2796 /* We can not hold the vcpu mutex here, we are already dying */
2797 if (kvm_s390_pv_cpu_get_handle(vcpu))
2798 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2799 free_page((unsigned long)(vcpu->arch.sie_block));
2800}
2801
2802static void kvm_free_vcpus(struct kvm *kvm)
2803{
2804 unsigned int i;
2805 struct kvm_vcpu *vcpu;
2806
2807 kvm_for_each_vcpu(i, vcpu, kvm)
2808 kvm_vcpu_destroy(vcpu);
2809
2810 mutex_lock(&kvm->lock);
2811 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2812 kvm->vcpus[i] = NULL;
2813
2814 atomic_set(&kvm->online_vcpus, 0);
2815 mutex_unlock(&kvm->lock);
2816}
2817
2818void kvm_arch_destroy_vm(struct kvm *kvm)
2819{
2820 u16 rc, rrc;
2821
2822 kvm_free_vcpus(kvm);
2823 sca_dispose(kvm);
2824 kvm_s390_gisa_destroy(kvm);
2825 /*
2826 * We are already at the end of life and kvm->lock is not taken.
2827 * This is ok as the file descriptor is closed by now and nobody
2828 * can mess with the pv state. To avoid lockdep_assert_held from
2829 * complaining we do not use kvm_s390_pv_is_protected.
2830 */
2831 if (kvm_s390_pv_get_handle(kvm))
2832 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2833 debug_unregister(kvm->arch.dbf);
2834 free_page((unsigned long)kvm->arch.sie_page2);
2835 if (!kvm_is_ucontrol(kvm))
2836 gmap_remove(kvm->arch.gmap);
2837 kvm_s390_destroy_adapters(kvm);
2838 kvm_s390_clear_float_irqs(kvm);
2839 kvm_s390_vsie_destroy(kvm);
2840 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2841}
2842
2843/* Section: vcpu related */
2844static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2845{
2846 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2847 if (!vcpu->arch.gmap)
2848 return -ENOMEM;
2849 vcpu->arch.gmap->private = vcpu->kvm;
2850
2851 return 0;
2852}
2853
2854static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2855{
2856 if (!kvm_s390_use_sca_entries())
2857 return;
2858 read_lock(&vcpu->kvm->arch.sca_lock);
2859 if (vcpu->kvm->arch.use_esca) {
2860 struct esca_block *sca = vcpu->kvm->arch.sca;
2861
2862 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2863 sca->cpu[vcpu->vcpu_id].sda = 0;
2864 } else {
2865 struct bsca_block *sca = vcpu->kvm->arch.sca;
2866
2867 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2868 sca->cpu[vcpu->vcpu_id].sda = 0;
2869 }
2870 read_unlock(&vcpu->kvm->arch.sca_lock);
2871}
2872
2873static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2874{
2875 if (!kvm_s390_use_sca_entries()) {
2876 struct bsca_block *sca = vcpu->kvm->arch.sca;
2877
2878 /* we still need the basic sca for the ipte control */
2879 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2880 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2881 return;
2882 }
2883 read_lock(&vcpu->kvm->arch.sca_lock);
2884 if (vcpu->kvm->arch.use_esca) {
2885 struct esca_block *sca = vcpu->kvm->arch.sca;
2886
2887 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2888 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2889 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2890 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2891 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2892 } else {
2893 struct bsca_block *sca = vcpu->kvm->arch.sca;
2894
2895 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2896 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2897 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2898 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2899 }
2900 read_unlock(&vcpu->kvm->arch.sca_lock);
2901}
2902
2903/* Basic SCA to Extended SCA data copy routines */
2904static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2905{
2906 d->sda = s->sda;
2907 d->sigp_ctrl.c = s->sigp_ctrl.c;
2908 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2909}
2910
2911static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2912{
2913 int i;
2914
2915 d->ipte_control = s->ipte_control;
2916 d->mcn[0] = s->mcn;
2917 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2918 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2919}
2920
2921static int sca_switch_to_extended(struct kvm *kvm)
2922{
2923 struct bsca_block *old_sca = kvm->arch.sca;
2924 struct esca_block *new_sca;
2925 struct kvm_vcpu *vcpu;
2926 unsigned int vcpu_idx;
2927 u32 scaol, scaoh;
2928
2929 if (kvm->arch.use_esca)
2930 return 0;
2931
2932 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2933 if (!new_sca)
2934 return -ENOMEM;
2935
2936 scaoh = (u32)((u64)(new_sca) >> 32);
2937 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2938
2939 kvm_s390_vcpu_block_all(kvm);
2940 write_lock(&kvm->arch.sca_lock);
2941
2942 sca_copy_b_to_e(new_sca, old_sca);
2943
2944 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2945 vcpu->arch.sie_block->scaoh = scaoh;
2946 vcpu->arch.sie_block->scaol = scaol;
2947 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2948 }
2949 kvm->arch.sca = new_sca;
2950 kvm->arch.use_esca = 1;
2951
2952 write_unlock(&kvm->arch.sca_lock);
2953 kvm_s390_vcpu_unblock_all(kvm);
2954
2955 free_page((unsigned long)old_sca);
2956
2957 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2958 old_sca, kvm->arch.sca);
2959 return 0;
2960}
2961
2962static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2963{
2964 int rc;
2965
2966 if (!kvm_s390_use_sca_entries()) {
2967 if (id < KVM_MAX_VCPUS)
2968 return true;
2969 return false;
2970 }
2971 if (id < KVM_S390_BSCA_CPU_SLOTS)
2972 return true;
2973 if (!sclp.has_esca || !sclp.has_64bscao)
2974 return false;
2975
2976 mutex_lock(&kvm->lock);
2977 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2978 mutex_unlock(&kvm->lock);
2979
2980 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2981}
2982
2983/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2984static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2985{
2986 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2987 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2988 vcpu->arch.cputm_start = get_tod_clock_fast();
2989 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2990}
2991
2992/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2993static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2994{
2995 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2996 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2997 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2998 vcpu->arch.cputm_start = 0;
2999 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3000}
3001
3002/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3003static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3004{
3005 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3006 vcpu->arch.cputm_enabled = true;
3007 __start_cpu_timer_accounting(vcpu);
3008}
3009
3010/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3011static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3012{
3013 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3014 __stop_cpu_timer_accounting(vcpu);
3015 vcpu->arch.cputm_enabled = false;
3016}
3017
3018static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3019{
3020 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3021 __enable_cpu_timer_accounting(vcpu);
3022 preempt_enable();
3023}
3024
3025static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3026{
3027 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3028 __disable_cpu_timer_accounting(vcpu);
3029 preempt_enable();
3030}
3031
3032/* set the cpu timer - may only be called from the VCPU thread itself */
3033void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3034{
3035 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3036 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3037 if (vcpu->arch.cputm_enabled)
3038 vcpu->arch.cputm_start = get_tod_clock_fast();
3039 vcpu->arch.sie_block->cputm = cputm;
3040 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3041 preempt_enable();
3042}
3043
3044/* update and get the cpu timer - can also be called from other VCPU threads */
3045__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3046{
3047 unsigned int seq;
3048 __u64 value;
3049
3050 if (unlikely(!vcpu->arch.cputm_enabled))
3051 return vcpu->arch.sie_block->cputm;
3052
3053 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3054 do {
3055 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3056 /*
3057 * If the writer would ever execute a read in the critical
3058 * section, e.g. in irq context, we have a deadlock.
3059 */
3060 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3061 value = vcpu->arch.sie_block->cputm;
3062 /* if cputm_start is 0, accounting is being started/stopped */
3063 if (likely(vcpu->arch.cputm_start))
3064 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3065 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3066 preempt_enable();
3067 return value;
3068}
3069
3070void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3071{
3072
3073 gmap_enable(vcpu->arch.enabled_gmap);
3074 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3075 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3076 __start_cpu_timer_accounting(vcpu);
3077 vcpu->cpu = cpu;
3078}
3079
3080void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3081{
3082 vcpu->cpu = -1;
3083 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3084 __stop_cpu_timer_accounting(vcpu);
3085 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3086 vcpu->arch.enabled_gmap = gmap_get_enabled();
3087 gmap_disable(vcpu->arch.enabled_gmap);
3088
3089}
3090
3091void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3092{
3093 mutex_lock(&vcpu->kvm->lock);
3094 preempt_disable();
3095 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3096 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3097 preempt_enable();
3098 mutex_unlock(&vcpu->kvm->lock);
3099 if (!kvm_is_ucontrol(vcpu->kvm)) {
3100 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3101 sca_add_vcpu(vcpu);
3102 }
3103 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3104 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3105 /* make vcpu_load load the right gmap on the first trigger */
3106 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3107}
3108
3109static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3110{
3111 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3112 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3113 return true;
3114 return false;
3115}
3116
3117static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3118{
3119 /* At least one ECC subfunction must be present */
3120 return kvm_has_pckmo_subfunc(kvm, 32) ||
3121 kvm_has_pckmo_subfunc(kvm, 33) ||
3122 kvm_has_pckmo_subfunc(kvm, 34) ||
3123 kvm_has_pckmo_subfunc(kvm, 40) ||
3124 kvm_has_pckmo_subfunc(kvm, 41);
3125
3126}
3127
3128static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3129{
3130 /*
3131 * If the AP instructions are not being interpreted and the MSAX3
3132 * facility is not configured for the guest, there is nothing to set up.
3133 */
3134 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3135 return;
3136
3137 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3138 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3139 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3140 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3141
3142 if (vcpu->kvm->arch.crypto.apie)
3143 vcpu->arch.sie_block->eca |= ECA_APIE;
3144
3145 /* Set up protected key support */
3146 if (vcpu->kvm->arch.crypto.aes_kw) {
3147 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3148 /* ecc is also wrapped with AES key */
3149 if (kvm_has_pckmo_ecc(vcpu->kvm))
3150 vcpu->arch.sie_block->ecd |= ECD_ECC;
3151 }
3152
3153 if (vcpu->kvm->arch.crypto.dea_kw)
3154 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3155}
3156
3157void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3158{
3159 free_page(vcpu->arch.sie_block->cbrlo);
3160 vcpu->arch.sie_block->cbrlo = 0;
3161}
3162
3163int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3164{
3165 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3166 if (!vcpu->arch.sie_block->cbrlo)
3167 return -ENOMEM;
3168 return 0;
3169}
3170
3171static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3172{
3173 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3174
3175 vcpu->arch.sie_block->ibc = model->ibc;
3176 if (test_kvm_facility(vcpu->kvm, 7))
3177 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3178}
3179
3180static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3181{
3182 int rc = 0;
3183 u16 uvrc, uvrrc;
3184
3185 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3186 CPUSTAT_SM |
3187 CPUSTAT_STOPPED);
3188
3189 if (test_kvm_facility(vcpu->kvm, 78))
3190 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3191 else if (test_kvm_facility(vcpu->kvm, 8))
3192 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3193
3194 kvm_s390_vcpu_setup_model(vcpu);
3195
3196 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3197 if (MACHINE_HAS_ESOP)
3198 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3199 if (test_kvm_facility(vcpu->kvm, 9))
3200 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3201 if (test_kvm_facility(vcpu->kvm, 73))
3202 vcpu->arch.sie_block->ecb |= ECB_TE;
3203
3204 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3205 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3206 if (test_kvm_facility(vcpu->kvm, 130))
3207 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3208 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3209 if (sclp.has_cei)
3210 vcpu->arch.sie_block->eca |= ECA_CEI;
3211 if (sclp.has_ib)
3212 vcpu->arch.sie_block->eca |= ECA_IB;
3213 if (sclp.has_siif)
3214 vcpu->arch.sie_block->eca |= ECA_SII;
3215 if (sclp.has_sigpif)
3216 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3217 if (test_kvm_facility(vcpu->kvm, 129)) {
3218 vcpu->arch.sie_block->eca |= ECA_VX;
3219 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3220 }
3221 if (test_kvm_facility(vcpu->kvm, 139))
3222 vcpu->arch.sie_block->ecd |= ECD_MEF;
3223 if (test_kvm_facility(vcpu->kvm, 156))
3224 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3225 if (vcpu->arch.sie_block->gd) {
3226 vcpu->arch.sie_block->eca |= ECA_AIV;
3227 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3228 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3229 }
3230 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3231 | SDNXC;
3232 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3233
3234 if (sclp.has_kss)
3235 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3236 else
3237 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3238
3239 if (vcpu->kvm->arch.use_cmma) {
3240 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3241 if (rc)
3242 return rc;
3243 }
3244 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3245 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3246
3247 vcpu->arch.sie_block->hpid = HPID_KVM;
3248
3249 kvm_s390_vcpu_crypto_setup(vcpu);
3250
3251 mutex_lock(&vcpu->kvm->lock);
3252 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3253 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3254 if (rc)
3255 kvm_s390_vcpu_unsetup_cmma(vcpu);
3256 }
3257 mutex_unlock(&vcpu->kvm->lock);
3258
3259 return rc;
3260}
3261
3262int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3263{
3264 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3265 return -EINVAL;
3266 return 0;
3267}
3268
3269int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3270{
3271 struct sie_page *sie_page;
3272 int rc;
3273
3274 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3275 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3276 if (!sie_page)
3277 return -ENOMEM;
3278
3279 vcpu->arch.sie_block = &sie_page->sie_block;
3280 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3281
3282 /* the real guest size will always be smaller than msl */
3283 vcpu->arch.sie_block->mso = 0;
3284 vcpu->arch.sie_block->msl = sclp.hamax;
3285
3286 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3287 spin_lock_init(&vcpu->arch.local_int.lock);
3288 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3289 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3290 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3291 seqcount_init(&vcpu->arch.cputm_seqcount);
3292
3293 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3294 kvm_clear_async_pf_completion_queue(vcpu);
3295 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3296 KVM_SYNC_GPRS |
3297 KVM_SYNC_ACRS |
3298 KVM_SYNC_CRS |
3299 KVM_SYNC_ARCH0 |
3300 KVM_SYNC_PFAULT |
3301 KVM_SYNC_DIAG318;
3302 kvm_s390_set_prefix(vcpu, 0);
3303 if (test_kvm_facility(vcpu->kvm, 64))
3304 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3305 if (test_kvm_facility(vcpu->kvm, 82))
3306 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3307 if (test_kvm_facility(vcpu->kvm, 133))
3308 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3309 if (test_kvm_facility(vcpu->kvm, 156))
3310 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3311 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3312 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3313 */
3314 if (MACHINE_HAS_VX)
3315 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3316 else
3317 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3318
3319 if (kvm_is_ucontrol(vcpu->kvm)) {
3320 rc = __kvm_ucontrol_vcpu_init(vcpu);
3321 if (rc)
3322 goto out_free_sie_block;
3323 }
3324
3325 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3326 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3327 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3328
3329 rc = kvm_s390_vcpu_setup(vcpu);
3330 if (rc)
3331 goto out_ucontrol_uninit;
3332 return 0;
3333
3334out_ucontrol_uninit:
3335 if (kvm_is_ucontrol(vcpu->kvm))
3336 gmap_remove(vcpu->arch.gmap);
3337out_free_sie_block:
3338 free_page((unsigned long)(vcpu->arch.sie_block));
3339 return rc;
3340}
3341
3342int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3343{
3344 return kvm_s390_vcpu_has_irq(vcpu, 0);
3345}
3346
3347bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3348{
3349 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3350}
3351
3352void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3353{
3354 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3355 exit_sie(vcpu);
3356}
3357
3358void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3359{
3360 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3361}
3362
3363static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3364{
3365 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3366 exit_sie(vcpu);
3367}
3368
3369bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3370{
3371 return atomic_read(&vcpu->arch.sie_block->prog20) &
3372 (PROG_BLOCK_SIE | PROG_REQUEST);
3373}
3374
3375static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3376{
3377 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3378}
3379
3380/*
3381 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3382 * If the CPU is not running (e.g. waiting as idle) the function will
3383 * return immediately. */
3384void exit_sie(struct kvm_vcpu *vcpu)
3385{
3386 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3387 kvm_s390_vsie_kick(vcpu);
3388 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3389 cpu_relax();
3390}
3391
3392/* Kick a guest cpu out of SIE to process a request synchronously */
3393void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3394{
3395 kvm_make_request(req, vcpu);
3396 kvm_s390_vcpu_request(vcpu);
3397}
3398
3399static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3400 unsigned long end)
3401{
3402 struct kvm *kvm = gmap->private;
3403 struct kvm_vcpu *vcpu;
3404 unsigned long prefix;
3405 int i;
3406
3407 if (gmap_is_shadow(gmap))
3408 return;
3409 if (start >= 1UL << 31)
3410 /* We are only interested in prefix pages */
3411 return;
3412 kvm_for_each_vcpu(i, vcpu, kvm) {
3413 /* match against both prefix pages */
3414 prefix = kvm_s390_get_prefix(vcpu);
3415 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3416 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3417 start, end);
3418 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3419 }
3420 }
3421}
3422
3423bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3424{
3425 /* do not poll with more than halt_poll_max_steal percent of steal time */
3426 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3427 halt_poll_max_steal) {
3428 vcpu->stat.halt_no_poll_steal++;
3429 return true;
3430 }
3431 return false;
3432}
3433
3434int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3435{
3436 /* kvm common code refers to this, but never calls it */
3437 BUG();
3438 return 0;
3439}
3440
3441static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3442 struct kvm_one_reg *reg)
3443{
3444 int r = -EINVAL;
3445
3446 switch (reg->id) {
3447 case KVM_REG_S390_TODPR:
3448 r = put_user(vcpu->arch.sie_block->todpr,
3449 (u32 __user *)reg->addr);
3450 break;
3451 case KVM_REG_S390_EPOCHDIFF:
3452 r = put_user(vcpu->arch.sie_block->epoch,
3453 (u64 __user *)reg->addr);
3454 break;
3455 case KVM_REG_S390_CPU_TIMER:
3456 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3457 (u64 __user *)reg->addr);
3458 break;
3459 case KVM_REG_S390_CLOCK_COMP:
3460 r = put_user(vcpu->arch.sie_block->ckc,
3461 (u64 __user *)reg->addr);
3462 break;
3463 case KVM_REG_S390_PFTOKEN:
3464 r = put_user(vcpu->arch.pfault_token,
3465 (u64 __user *)reg->addr);
3466 break;
3467 case KVM_REG_S390_PFCOMPARE:
3468 r = put_user(vcpu->arch.pfault_compare,
3469 (u64 __user *)reg->addr);
3470 break;
3471 case KVM_REG_S390_PFSELECT:
3472 r = put_user(vcpu->arch.pfault_select,
3473 (u64 __user *)reg->addr);
3474 break;
3475 case KVM_REG_S390_PP:
3476 r = put_user(vcpu->arch.sie_block->pp,
3477 (u64 __user *)reg->addr);
3478 break;
3479 case KVM_REG_S390_GBEA:
3480 r = put_user(vcpu->arch.sie_block->gbea,
3481 (u64 __user *)reg->addr);
3482 break;
3483 default:
3484 break;
3485 }
3486
3487 return r;
3488}
3489
3490static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3491 struct kvm_one_reg *reg)
3492{
3493 int r = -EINVAL;
3494 __u64 val;
3495
3496 switch (reg->id) {
3497 case KVM_REG_S390_TODPR:
3498 r = get_user(vcpu->arch.sie_block->todpr,
3499 (u32 __user *)reg->addr);
3500 break;
3501 case KVM_REG_S390_EPOCHDIFF:
3502 r = get_user(vcpu->arch.sie_block->epoch,
3503 (u64 __user *)reg->addr);
3504 break;
3505 case KVM_REG_S390_CPU_TIMER:
3506 r = get_user(val, (u64 __user *)reg->addr);
3507 if (!r)
3508 kvm_s390_set_cpu_timer(vcpu, val);
3509 break;
3510 case KVM_REG_S390_CLOCK_COMP:
3511 r = get_user(vcpu->arch.sie_block->ckc,
3512 (u64 __user *)reg->addr);
3513 break;
3514 case KVM_REG_S390_PFTOKEN:
3515 r = get_user(vcpu->arch.pfault_token,
3516 (u64 __user *)reg->addr);
3517 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3518 kvm_clear_async_pf_completion_queue(vcpu);
3519 break;
3520 case KVM_REG_S390_PFCOMPARE:
3521 r = get_user(vcpu->arch.pfault_compare,
3522 (u64 __user *)reg->addr);
3523 break;
3524 case KVM_REG_S390_PFSELECT:
3525 r = get_user(vcpu->arch.pfault_select,
3526 (u64 __user *)reg->addr);
3527 break;
3528 case KVM_REG_S390_PP:
3529 r = get_user(vcpu->arch.sie_block->pp,
3530 (u64 __user *)reg->addr);
3531 break;
3532 case KVM_REG_S390_GBEA:
3533 r = get_user(vcpu->arch.sie_block->gbea,
3534 (u64 __user *)reg->addr);
3535 break;
3536 default:
3537 break;
3538 }
3539
3540 return r;
3541}
3542
3543static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3544{
3545 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3546 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3547 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3548
3549 kvm_clear_async_pf_completion_queue(vcpu);
3550 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3551 kvm_s390_vcpu_stop(vcpu);
3552 kvm_s390_clear_local_irqs(vcpu);
3553}
3554
3555static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3556{
3557 /* Initial reset is a superset of the normal reset */
3558 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3559
3560 /*
3561 * This equals initial cpu reset in pop, but we don't switch to ESA.
3562 * We do not only reset the internal data, but also ...
3563 */
3564 vcpu->arch.sie_block->gpsw.mask = 0;
3565 vcpu->arch.sie_block->gpsw.addr = 0;
3566 kvm_s390_set_prefix(vcpu, 0);
3567 kvm_s390_set_cpu_timer(vcpu, 0);
3568 vcpu->arch.sie_block->ckc = 0;
3569 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3570 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3571 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3572
3573 /* ... the data in sync regs */
3574 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3575 vcpu->run->s.regs.ckc = 0;
3576 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3577 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3578 vcpu->run->psw_addr = 0;
3579 vcpu->run->psw_mask = 0;
3580 vcpu->run->s.regs.todpr = 0;
3581 vcpu->run->s.regs.cputm = 0;
3582 vcpu->run->s.regs.ckc = 0;
3583 vcpu->run->s.regs.pp = 0;
3584 vcpu->run->s.regs.gbea = 1;
3585 vcpu->run->s.regs.fpc = 0;
3586 /*
3587 * Do not reset these registers in the protected case, as some of
3588 * them are overlayed and they are not accessible in this case
3589 * anyway.
3590 */
3591 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3592 vcpu->arch.sie_block->gbea = 1;
3593 vcpu->arch.sie_block->pp = 0;
3594 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3595 vcpu->arch.sie_block->todpr = 0;
3596 }
3597}
3598
3599static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3600{
3601 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3602
3603 /* Clear reset is a superset of the initial reset */
3604 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3605
3606 memset(®s->gprs, 0, sizeof(regs->gprs));
3607 memset(®s->vrs, 0, sizeof(regs->vrs));
3608 memset(®s->acrs, 0, sizeof(regs->acrs));
3609 memset(®s->gscb, 0, sizeof(regs->gscb));
3610
3611 regs->etoken = 0;
3612 regs->etoken_extension = 0;
3613}
3614
3615int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3616{
3617 vcpu_load(vcpu);
3618 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3619 vcpu_put(vcpu);
3620 return 0;
3621}
3622
3623int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3624{
3625 vcpu_load(vcpu);
3626 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3627 vcpu_put(vcpu);
3628 return 0;
3629}
3630
3631int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3632 struct kvm_sregs *sregs)
3633{
3634 vcpu_load(vcpu);
3635
3636 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3637 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3638
3639 vcpu_put(vcpu);
3640 return 0;
3641}
3642
3643int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3644 struct kvm_sregs *sregs)
3645{
3646 vcpu_load(vcpu);
3647
3648 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3649 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3650
3651 vcpu_put(vcpu);
3652 return 0;
3653}
3654
3655int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3656{
3657 int ret = 0;
3658
3659 vcpu_load(vcpu);
3660
3661 if (test_fp_ctl(fpu->fpc)) {
3662 ret = -EINVAL;
3663 goto out;
3664 }
3665 vcpu->run->s.regs.fpc = fpu->fpc;
3666 if (MACHINE_HAS_VX)
3667 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3668 (freg_t *) fpu->fprs);
3669 else
3670 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3671
3672out:
3673 vcpu_put(vcpu);
3674 return ret;
3675}
3676
3677int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3678{
3679 vcpu_load(vcpu);
3680
3681 /* make sure we have the latest values */
3682 save_fpu_regs();
3683 if (MACHINE_HAS_VX)
3684 convert_vx_to_fp((freg_t *) fpu->fprs,
3685 (__vector128 *) vcpu->run->s.regs.vrs);
3686 else
3687 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3688 fpu->fpc = vcpu->run->s.regs.fpc;
3689
3690 vcpu_put(vcpu);
3691 return 0;
3692}
3693
3694static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3695{
3696 int rc = 0;
3697
3698 if (!is_vcpu_stopped(vcpu))
3699 rc = -EBUSY;
3700 else {
3701 vcpu->run->psw_mask = psw.mask;
3702 vcpu->run->psw_addr = psw.addr;
3703 }
3704 return rc;
3705}
3706
3707int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3708 struct kvm_translation *tr)
3709{
3710 return -EINVAL; /* not implemented yet */
3711}
3712
3713#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3714 KVM_GUESTDBG_USE_HW_BP | \
3715 KVM_GUESTDBG_ENABLE)
3716
3717int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3718 struct kvm_guest_debug *dbg)
3719{
3720 int rc = 0;
3721
3722 vcpu_load(vcpu);
3723
3724 vcpu->guest_debug = 0;
3725 kvm_s390_clear_bp_data(vcpu);
3726
3727 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3728 rc = -EINVAL;
3729 goto out;
3730 }
3731 if (!sclp.has_gpere) {
3732 rc = -EINVAL;
3733 goto out;
3734 }
3735
3736 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3737 vcpu->guest_debug = dbg->control;
3738 /* enforce guest PER */
3739 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3740
3741 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3742 rc = kvm_s390_import_bp_data(vcpu, dbg);
3743 } else {
3744 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3745 vcpu->arch.guestdbg.last_bp = 0;
3746 }
3747
3748 if (rc) {
3749 vcpu->guest_debug = 0;
3750 kvm_s390_clear_bp_data(vcpu);
3751 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3752 }
3753
3754out:
3755 vcpu_put(vcpu);
3756 return rc;
3757}
3758
3759int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3760 struct kvm_mp_state *mp_state)
3761{
3762 int ret;
3763
3764 vcpu_load(vcpu);
3765
3766 /* CHECK_STOP and LOAD are not supported yet */
3767 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3768 KVM_MP_STATE_OPERATING;
3769
3770 vcpu_put(vcpu);
3771 return ret;
3772}
3773
3774int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3775 struct kvm_mp_state *mp_state)
3776{
3777 int rc = 0;
3778
3779 vcpu_load(vcpu);
3780
3781 /* user space knows about this interface - let it control the state */
3782 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3783
3784 switch (mp_state->mp_state) {
3785 case KVM_MP_STATE_STOPPED:
3786 rc = kvm_s390_vcpu_stop(vcpu);
3787 break;
3788 case KVM_MP_STATE_OPERATING:
3789 rc = kvm_s390_vcpu_start(vcpu);
3790 break;
3791 case KVM_MP_STATE_LOAD:
3792 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3793 rc = -ENXIO;
3794 break;
3795 }
3796 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3797 break;
3798 case KVM_MP_STATE_CHECK_STOP:
3799 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3800 default:
3801 rc = -ENXIO;
3802 }
3803
3804 vcpu_put(vcpu);
3805 return rc;
3806}
3807
3808static bool ibs_enabled(struct kvm_vcpu *vcpu)
3809{
3810 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3811}
3812
3813static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3814{
3815retry:
3816 kvm_s390_vcpu_request_handled(vcpu);
3817 if (!kvm_request_pending(vcpu))
3818 return 0;
3819 /*
3820 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3821 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3822 * This ensures that the ipte instruction for this request has
3823 * already finished. We might race against a second unmapper that
3824 * wants to set the blocking bit. Lets just retry the request loop.
3825 */
3826 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3827 int rc;
3828 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3829 kvm_s390_get_prefix(vcpu),
3830 PAGE_SIZE * 2, PROT_WRITE);
3831 if (rc) {
3832 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3833 return rc;
3834 }
3835 goto retry;
3836 }
3837
3838 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3839 vcpu->arch.sie_block->ihcpu = 0xffff;
3840 goto retry;
3841 }
3842
3843 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3844 if (!ibs_enabled(vcpu)) {
3845 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3846 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3847 }
3848 goto retry;
3849 }
3850
3851 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3852 if (ibs_enabled(vcpu)) {
3853 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3854 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3855 }
3856 goto retry;
3857 }
3858
3859 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3860 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3861 goto retry;
3862 }
3863
3864 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3865 /*
3866 * Disable CMM virtualization; we will emulate the ESSA
3867 * instruction manually, in order to provide additional
3868 * functionalities needed for live migration.
3869 */
3870 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3871 goto retry;
3872 }
3873
3874 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3875 /*
3876 * Re-enable CMM virtualization if CMMA is available and
3877 * CMM has been used.
3878 */
3879 if ((vcpu->kvm->arch.use_cmma) &&
3880 (vcpu->kvm->mm->context.uses_cmm))
3881 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3882 goto retry;
3883 }
3884
3885 /* nothing to do, just clear the request */
3886 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3887 /* we left the vsie handler, nothing to do, just clear the request */
3888 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3889
3890 return 0;
3891}
3892
3893void kvm_s390_set_tod_clock(struct kvm *kvm,
3894 const struct kvm_s390_vm_tod_clock *gtod)
3895{
3896 struct kvm_vcpu *vcpu;
3897 union tod_clock clk;
3898 int i;
3899
3900 mutex_lock(&kvm->lock);
3901 preempt_disable();
3902
3903 store_tod_clock_ext(&clk);
3904
3905 kvm->arch.epoch = gtod->tod - clk.tod;
3906 kvm->arch.epdx = 0;
3907 if (test_kvm_facility(kvm, 139)) {
3908 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3909 if (kvm->arch.epoch > gtod->tod)
3910 kvm->arch.epdx -= 1;
3911 }
3912
3913 kvm_s390_vcpu_block_all(kvm);
3914 kvm_for_each_vcpu(i, vcpu, kvm) {
3915 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3916 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3917 }
3918
3919 kvm_s390_vcpu_unblock_all(kvm);
3920 preempt_enable();
3921 mutex_unlock(&kvm->lock);
3922}
3923
3924/**
3925 * kvm_arch_fault_in_page - fault-in guest page if necessary
3926 * @vcpu: The corresponding virtual cpu
3927 * @gpa: Guest physical address
3928 * @writable: Whether the page should be writable or not
3929 *
3930 * Make sure that a guest page has been faulted-in on the host.
3931 *
3932 * Return: Zero on success, negative error code otherwise.
3933 */
3934long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3935{
3936 return gmap_fault(vcpu->arch.gmap, gpa,
3937 writable ? FAULT_FLAG_WRITE : 0);
3938}
3939
3940static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3941 unsigned long token)
3942{
3943 struct kvm_s390_interrupt inti;
3944 struct kvm_s390_irq irq;
3945
3946 if (start_token) {
3947 irq.u.ext.ext_params2 = token;
3948 irq.type = KVM_S390_INT_PFAULT_INIT;
3949 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3950 } else {
3951 inti.type = KVM_S390_INT_PFAULT_DONE;
3952 inti.parm64 = token;
3953 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3954 }
3955}
3956
3957bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3958 struct kvm_async_pf *work)
3959{
3960 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3961 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3962
3963 return true;
3964}
3965
3966void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3967 struct kvm_async_pf *work)
3968{
3969 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3970 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3971}
3972
3973void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3974 struct kvm_async_pf *work)
3975{
3976 /* s390 will always inject the page directly */
3977}
3978
3979bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3980{
3981 /*
3982 * s390 will always inject the page directly,
3983 * but we still want check_async_completion to cleanup
3984 */
3985 return true;
3986}
3987
3988static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3989{
3990 hva_t hva;
3991 struct kvm_arch_async_pf arch;
3992
3993 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3994 return false;
3995 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3996 vcpu->arch.pfault_compare)
3997 return false;
3998 if (psw_extint_disabled(vcpu))
3999 return false;
4000 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4001 return false;
4002 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4003 return false;
4004 if (!vcpu->arch.gmap->pfault_enabled)
4005 return false;
4006
4007 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4008 hva += current->thread.gmap_addr & ~PAGE_MASK;
4009 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4010 return false;
4011
4012 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4013}
4014
4015static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4016{
4017 int rc, cpuflags;
4018
4019 /*
4020 * On s390 notifications for arriving pages will be delivered directly
4021 * to the guest but the house keeping for completed pfaults is
4022 * handled outside the worker.
4023 */
4024 kvm_check_async_pf_completion(vcpu);
4025
4026 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4027 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4028
4029 if (need_resched())
4030 schedule();
4031
4032 if (!kvm_is_ucontrol(vcpu->kvm)) {
4033 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4034 if (rc)
4035 return rc;
4036 }
4037
4038 rc = kvm_s390_handle_requests(vcpu);
4039 if (rc)
4040 return rc;
4041
4042 if (guestdbg_enabled(vcpu)) {
4043 kvm_s390_backup_guest_per_regs(vcpu);
4044 kvm_s390_patch_guest_per_regs(vcpu);
4045 }
4046
4047 clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4048
4049 vcpu->arch.sie_block->icptcode = 0;
4050 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4051 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4052 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4053
4054 return 0;
4055}
4056
4057static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4058{
4059 struct kvm_s390_pgm_info pgm_info = {
4060 .code = PGM_ADDRESSING,
4061 };
4062 u8 opcode, ilen;
4063 int rc;
4064
4065 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4066 trace_kvm_s390_sie_fault(vcpu);
4067
4068 /*
4069 * We want to inject an addressing exception, which is defined as a
4070 * suppressing or terminating exception. However, since we came here
4071 * by a DAT access exception, the PSW still points to the faulting
4072 * instruction since DAT exceptions are nullifying. So we've got
4073 * to look up the current opcode to get the length of the instruction
4074 * to be able to forward the PSW.
4075 */
4076 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4077 ilen = insn_length(opcode);
4078 if (rc < 0) {
4079 return rc;
4080 } else if (rc) {
4081 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4082 * Forward by arbitrary ilc, injection will take care of
4083 * nullification if necessary.
4084 */
4085 pgm_info = vcpu->arch.pgm;
4086 ilen = 4;
4087 }
4088 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4089 kvm_s390_forward_psw(vcpu, ilen);
4090 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4091}
4092
4093static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4094{
4095 struct mcck_volatile_info *mcck_info;
4096 struct sie_page *sie_page;
4097
4098 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4099 vcpu->arch.sie_block->icptcode);
4100 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4101
4102 if (guestdbg_enabled(vcpu))
4103 kvm_s390_restore_guest_per_regs(vcpu);
4104
4105 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4106 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4107
4108 if (exit_reason == -EINTR) {
4109 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4110 sie_page = container_of(vcpu->arch.sie_block,
4111 struct sie_page, sie_block);
4112 mcck_info = &sie_page->mcck_info;
4113 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4114 return 0;
4115 }
4116
4117 if (vcpu->arch.sie_block->icptcode > 0) {
4118 int rc = kvm_handle_sie_intercept(vcpu);
4119
4120 if (rc != -EOPNOTSUPP)
4121 return rc;
4122 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4123 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4124 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4125 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4126 return -EREMOTE;
4127 } else if (exit_reason != -EFAULT) {
4128 vcpu->stat.exit_null++;
4129 return 0;
4130 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4131 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4132 vcpu->run->s390_ucontrol.trans_exc_code =
4133 current->thread.gmap_addr;
4134 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4135 return -EREMOTE;
4136 } else if (current->thread.gmap_pfault) {
4137 trace_kvm_s390_major_guest_pfault(vcpu);
4138 current->thread.gmap_pfault = 0;
4139 if (kvm_arch_setup_async_pf(vcpu))
4140 return 0;
4141 vcpu->stat.pfault_sync++;
4142 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4143 }
4144 return vcpu_post_run_fault_in_sie(vcpu);
4145}
4146
4147#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4148static int __vcpu_run(struct kvm_vcpu *vcpu)
4149{
4150 int rc, exit_reason;
4151 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4152
4153 /*
4154 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4155 * ning the guest), so that memslots (and other stuff) are protected
4156 */
4157 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4158
4159 do {
4160 rc = vcpu_pre_run(vcpu);
4161 if (rc)
4162 break;
4163
4164 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4165 /*
4166 * As PF_VCPU will be used in fault handler, between
4167 * guest_enter and guest_exit should be no uaccess.
4168 */
4169 local_irq_disable();
4170 guest_enter_irqoff();
4171 __disable_cpu_timer_accounting(vcpu);
4172 local_irq_enable();
4173 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4174 memcpy(sie_page->pv_grregs,
4175 vcpu->run->s.regs.gprs,
4176 sizeof(sie_page->pv_grregs));
4177 }
4178 if (test_cpu_flag(CIF_FPU))
4179 load_fpu_regs();
4180 exit_reason = sie64a(vcpu->arch.sie_block,
4181 vcpu->run->s.regs.gprs);
4182 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4183 memcpy(vcpu->run->s.regs.gprs,
4184 sie_page->pv_grregs,
4185 sizeof(sie_page->pv_grregs));
4186 /*
4187 * We're not allowed to inject interrupts on intercepts
4188 * that leave the guest state in an "in-between" state
4189 * where the next SIE entry will do a continuation.
4190 * Fence interrupts in our "internal" PSW.
4191 */
4192 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4193 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4194 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4195 }
4196 }
4197 local_irq_disable();
4198 __enable_cpu_timer_accounting(vcpu);
4199 guest_exit_irqoff();
4200 local_irq_enable();
4201 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4202
4203 rc = vcpu_post_run(vcpu, exit_reason);
4204 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4205
4206 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4207 return rc;
4208}
4209
4210static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4211{
4212 struct kvm_run *kvm_run = vcpu->run;
4213 struct runtime_instr_cb *riccb;
4214 struct gs_cb *gscb;
4215
4216 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4217 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4218 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4219 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4220 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4221 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4222 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4223 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4224 }
4225 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4226 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4227 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4228 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4229 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4230 kvm_clear_async_pf_completion_queue(vcpu);
4231 }
4232 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4233 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4234 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4235 }
4236 /*
4237 * If userspace sets the riccb (e.g. after migration) to a valid state,
4238 * we should enable RI here instead of doing the lazy enablement.
4239 */
4240 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4241 test_kvm_facility(vcpu->kvm, 64) &&
4242 riccb->v &&
4243 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4244 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4245 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4246 }
4247 /*
4248 * If userspace sets the gscb (e.g. after migration) to non-zero,
4249 * we should enable GS here instead of doing the lazy enablement.
4250 */
4251 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4252 test_kvm_facility(vcpu->kvm, 133) &&
4253 gscb->gssm &&
4254 !vcpu->arch.gs_enabled) {
4255 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4256 vcpu->arch.sie_block->ecb |= ECB_GS;
4257 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4258 vcpu->arch.gs_enabled = 1;
4259 }
4260 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4261 test_kvm_facility(vcpu->kvm, 82)) {
4262 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4263 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4264 }
4265 if (MACHINE_HAS_GS) {
4266 preempt_disable();
4267 __ctl_set_bit(2, 4);
4268 if (current->thread.gs_cb) {
4269 vcpu->arch.host_gscb = current->thread.gs_cb;
4270 save_gs_cb(vcpu->arch.host_gscb);
4271 }
4272 if (vcpu->arch.gs_enabled) {
4273 current->thread.gs_cb = (struct gs_cb *)
4274 &vcpu->run->s.regs.gscb;
4275 restore_gs_cb(current->thread.gs_cb);
4276 }
4277 preempt_enable();
4278 }
4279 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4280}
4281
4282static void sync_regs(struct kvm_vcpu *vcpu)
4283{
4284 struct kvm_run *kvm_run = vcpu->run;
4285
4286 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4287 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4288 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4289 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4290 /* some control register changes require a tlb flush */
4291 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4292 }
4293 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4294 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4295 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4296 }
4297 save_access_regs(vcpu->arch.host_acrs);
4298 restore_access_regs(vcpu->run->s.regs.acrs);
4299 /* save host (userspace) fprs/vrs */
4300 save_fpu_regs();
4301 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4302 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4303 if (MACHINE_HAS_VX)
4304 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4305 else
4306 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4307 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4308 if (test_fp_ctl(current->thread.fpu.fpc))
4309 /* User space provided an invalid FPC, let's clear it */
4310 current->thread.fpu.fpc = 0;
4311
4312 /* Sync fmt2 only data */
4313 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4314 sync_regs_fmt2(vcpu);
4315 } else {
4316 /*
4317 * In several places we have to modify our internal view to
4318 * not do things that are disallowed by the ultravisor. For
4319 * example we must not inject interrupts after specific exits
4320 * (e.g. 112 prefix page not secure). We do this by turning
4321 * off the machine check, external and I/O interrupt bits
4322 * of our PSW copy. To avoid getting validity intercepts, we
4323 * do only accept the condition code from userspace.
4324 */
4325 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4326 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4327 PSW_MASK_CC;
4328 }
4329
4330 kvm_run->kvm_dirty_regs = 0;
4331}
4332
4333static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4334{
4335 struct kvm_run *kvm_run = vcpu->run;
4336
4337 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4338 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4339 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4340 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4341 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4342 if (MACHINE_HAS_GS) {
4343 preempt_disable();
4344 __ctl_set_bit(2, 4);
4345 if (vcpu->arch.gs_enabled)
4346 save_gs_cb(current->thread.gs_cb);
4347 current->thread.gs_cb = vcpu->arch.host_gscb;
4348 restore_gs_cb(vcpu->arch.host_gscb);
4349 if (!vcpu->arch.host_gscb)
4350 __ctl_clear_bit(2, 4);
4351 vcpu->arch.host_gscb = NULL;
4352 preempt_enable();
4353 }
4354 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4355}
4356
4357static void store_regs(struct kvm_vcpu *vcpu)
4358{
4359 struct kvm_run *kvm_run = vcpu->run;
4360
4361 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4362 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4363 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4364 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4365 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4366 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4367 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4368 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4369 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4370 save_access_regs(vcpu->run->s.regs.acrs);
4371 restore_access_regs(vcpu->arch.host_acrs);
4372 /* Save guest register state */
4373 save_fpu_regs();
4374 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4375 /* Restore will be done lazily at return */
4376 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4377 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4378 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4379 store_regs_fmt2(vcpu);
4380}
4381
4382int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4383{
4384 struct kvm_run *kvm_run = vcpu->run;
4385 int rc;
4386
4387 if (kvm_run->immediate_exit)
4388 return -EINTR;
4389
4390 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4391 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4392 return -EINVAL;
4393
4394 vcpu_load(vcpu);
4395
4396 if (guestdbg_exit_pending(vcpu)) {
4397 kvm_s390_prepare_debug_exit(vcpu);
4398 rc = 0;
4399 goto out;
4400 }
4401
4402 kvm_sigset_activate(vcpu);
4403
4404 /*
4405 * no need to check the return value of vcpu_start as it can only have
4406 * an error for protvirt, but protvirt means user cpu state
4407 */
4408 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4409 kvm_s390_vcpu_start(vcpu);
4410 } else if (is_vcpu_stopped(vcpu)) {
4411 pr_err_ratelimited("can't run stopped vcpu %d\n",
4412 vcpu->vcpu_id);
4413 rc = -EINVAL;
4414 goto out;
4415 }
4416
4417 sync_regs(vcpu);
4418 enable_cpu_timer_accounting(vcpu);
4419
4420 might_fault();
4421 rc = __vcpu_run(vcpu);
4422
4423 if (signal_pending(current) && !rc) {
4424 kvm_run->exit_reason = KVM_EXIT_INTR;
4425 rc = -EINTR;
4426 }
4427
4428 if (guestdbg_exit_pending(vcpu) && !rc) {
4429 kvm_s390_prepare_debug_exit(vcpu);
4430 rc = 0;
4431 }
4432
4433 if (rc == -EREMOTE) {
4434 /* userspace support is needed, kvm_run has been prepared */
4435 rc = 0;
4436 }
4437
4438 disable_cpu_timer_accounting(vcpu);
4439 store_regs(vcpu);
4440
4441 kvm_sigset_deactivate(vcpu);
4442
4443 vcpu->stat.exit_userspace++;
4444out:
4445 vcpu_put(vcpu);
4446 return rc;
4447}
4448
4449/*
4450 * store status at address
4451 * we use have two special cases:
4452 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4453 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4454 */
4455int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4456{
4457 unsigned char archmode = 1;
4458 freg_t fprs[NUM_FPRS];
4459 unsigned int px;
4460 u64 clkcomp, cputm;
4461 int rc;
4462
4463 px = kvm_s390_get_prefix(vcpu);
4464 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4465 if (write_guest_abs(vcpu, 163, &archmode, 1))
4466 return -EFAULT;
4467 gpa = 0;
4468 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4469 if (write_guest_real(vcpu, 163, &archmode, 1))
4470 return -EFAULT;
4471 gpa = px;
4472 } else
4473 gpa -= __LC_FPREGS_SAVE_AREA;
4474
4475 /* manually convert vector registers if necessary */
4476 if (MACHINE_HAS_VX) {
4477 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4478 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4479 fprs, 128);
4480 } else {
4481 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4482 vcpu->run->s.regs.fprs, 128);
4483 }
4484 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4485 vcpu->run->s.regs.gprs, 128);
4486 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4487 &vcpu->arch.sie_block->gpsw, 16);
4488 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4489 &px, 4);
4490 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4491 &vcpu->run->s.regs.fpc, 4);
4492 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4493 &vcpu->arch.sie_block->todpr, 4);
4494 cputm = kvm_s390_get_cpu_timer(vcpu);
4495 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4496 &cputm, 8);
4497 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4498 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4499 &clkcomp, 8);
4500 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4501 &vcpu->run->s.regs.acrs, 64);
4502 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4503 &vcpu->arch.sie_block->gcr, 128);
4504 return rc ? -EFAULT : 0;
4505}
4506
4507int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4508{
4509 /*
4510 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4511 * switch in the run ioctl. Let's update our copies before we save
4512 * it into the save area
4513 */
4514 save_fpu_regs();
4515 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4516 save_access_regs(vcpu->run->s.regs.acrs);
4517
4518 return kvm_s390_store_status_unloaded(vcpu, addr);
4519}
4520
4521static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4522{
4523 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4524 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4525}
4526
4527static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4528{
4529 unsigned int i;
4530 struct kvm_vcpu *vcpu;
4531
4532 kvm_for_each_vcpu(i, vcpu, kvm) {
4533 __disable_ibs_on_vcpu(vcpu);
4534 }
4535}
4536
4537static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4538{
4539 if (!sclp.has_ibs)
4540 return;
4541 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4542 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4543}
4544
4545int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4546{
4547 int i, online_vcpus, r = 0, started_vcpus = 0;
4548
4549 if (!is_vcpu_stopped(vcpu))
4550 return 0;
4551
4552 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4553 /* Only one cpu at a time may enter/leave the STOPPED state. */
4554 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4555 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4556
4557 /* Let's tell the UV that we want to change into the operating state */
4558 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4559 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4560 if (r) {
4561 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4562 return r;
4563 }
4564 }
4565
4566 for (i = 0; i < online_vcpus; i++) {
4567 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4568 started_vcpus++;
4569 }
4570
4571 if (started_vcpus == 0) {
4572 /* we're the only active VCPU -> speed it up */
4573 __enable_ibs_on_vcpu(vcpu);
4574 } else if (started_vcpus == 1) {
4575 /*
4576 * As we are starting a second VCPU, we have to disable
4577 * the IBS facility on all VCPUs to remove potentially
4578 * outstanding ENABLE requests.
4579 */
4580 __disable_ibs_on_all_vcpus(vcpu->kvm);
4581 }
4582
4583 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4584 /*
4585 * The real PSW might have changed due to a RESTART interpreted by the
4586 * ultravisor. We block all interrupts and let the next sie exit
4587 * refresh our view.
4588 */
4589 if (kvm_s390_pv_cpu_is_protected(vcpu))
4590 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4591 /*
4592 * Another VCPU might have used IBS while we were offline.
4593 * Let's play safe and flush the VCPU at startup.
4594 */
4595 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4596 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4597 return 0;
4598}
4599
4600int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4601{
4602 int i, online_vcpus, r = 0, started_vcpus = 0;
4603 struct kvm_vcpu *started_vcpu = NULL;
4604
4605 if (is_vcpu_stopped(vcpu))
4606 return 0;
4607
4608 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4609 /* Only one cpu at a time may enter/leave the STOPPED state. */
4610 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4611 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4612
4613 /* Let's tell the UV that we want to change into the stopped state */
4614 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4615 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4616 if (r) {
4617 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4618 return r;
4619 }
4620 }
4621
4622 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4623 kvm_s390_clear_stop_irq(vcpu);
4624
4625 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4626 __disable_ibs_on_vcpu(vcpu);
4627
4628 for (i = 0; i < online_vcpus; i++) {
4629 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4630 started_vcpus++;
4631 started_vcpu = vcpu->kvm->vcpus[i];
4632 }
4633 }
4634
4635 if (started_vcpus == 1) {
4636 /*
4637 * As we only have one VCPU left, we want to enable the
4638 * IBS facility for that VCPU to speed it up.
4639 */
4640 __enable_ibs_on_vcpu(started_vcpu);
4641 }
4642
4643 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4644 return 0;
4645}
4646
4647static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4648 struct kvm_enable_cap *cap)
4649{
4650 int r;
4651
4652 if (cap->flags)
4653 return -EINVAL;
4654
4655 switch (cap->cap) {
4656 case KVM_CAP_S390_CSS_SUPPORT:
4657 if (!vcpu->kvm->arch.css_support) {
4658 vcpu->kvm->arch.css_support = 1;
4659 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4660 trace_kvm_s390_enable_css(vcpu->kvm);
4661 }
4662 r = 0;
4663 break;
4664 default:
4665 r = -EINVAL;
4666 break;
4667 }
4668 return r;
4669}
4670
4671static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4672 struct kvm_s390_mem_op *mop)
4673{
4674 void __user *uaddr = (void __user *)mop->buf;
4675 int r = 0;
4676
4677 if (mop->flags || !mop->size)
4678 return -EINVAL;
4679 if (mop->size + mop->sida_offset < mop->size)
4680 return -EINVAL;
4681 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4682 return -E2BIG;
4683
4684 switch (mop->op) {
4685 case KVM_S390_MEMOP_SIDA_READ:
4686 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4687 mop->sida_offset), mop->size))
4688 r = -EFAULT;
4689
4690 break;
4691 case KVM_S390_MEMOP_SIDA_WRITE:
4692 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4693 mop->sida_offset), uaddr, mop->size))
4694 r = -EFAULT;
4695 break;
4696 }
4697 return r;
4698}
4699static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4700 struct kvm_s390_mem_op *mop)
4701{
4702 void __user *uaddr = (void __user *)mop->buf;
4703 void *tmpbuf = NULL;
4704 int r = 0;
4705 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4706 | KVM_S390_MEMOP_F_CHECK_ONLY;
4707
4708 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4709 return -EINVAL;
4710
4711 if (mop->size > MEM_OP_MAX_SIZE)
4712 return -E2BIG;
4713
4714 if (kvm_s390_pv_cpu_is_protected(vcpu))
4715 return -EINVAL;
4716
4717 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4718 tmpbuf = vmalloc(mop->size);
4719 if (!tmpbuf)
4720 return -ENOMEM;
4721 }
4722
4723 switch (mop->op) {
4724 case KVM_S390_MEMOP_LOGICAL_READ:
4725 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4726 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4727 mop->size, GACC_FETCH);
4728 break;
4729 }
4730 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4731 if (r == 0) {
4732 if (copy_to_user(uaddr, tmpbuf, mop->size))
4733 r = -EFAULT;
4734 }
4735 break;
4736 case KVM_S390_MEMOP_LOGICAL_WRITE:
4737 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4738 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4739 mop->size, GACC_STORE);
4740 break;
4741 }
4742 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4743 r = -EFAULT;
4744 break;
4745 }
4746 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4747 break;
4748 }
4749
4750 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4751 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4752
4753 vfree(tmpbuf);
4754 return r;
4755}
4756
4757static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4758 struct kvm_s390_mem_op *mop)
4759{
4760 int r, srcu_idx;
4761
4762 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4763
4764 switch (mop->op) {
4765 case KVM_S390_MEMOP_LOGICAL_READ:
4766 case KVM_S390_MEMOP_LOGICAL_WRITE:
4767 r = kvm_s390_guest_mem_op(vcpu, mop);
4768 break;
4769 case KVM_S390_MEMOP_SIDA_READ:
4770 case KVM_S390_MEMOP_SIDA_WRITE:
4771 /* we are locked against sida going away by the vcpu->mutex */
4772 r = kvm_s390_guest_sida_op(vcpu, mop);
4773 break;
4774 default:
4775 r = -EINVAL;
4776 }
4777
4778 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4779 return r;
4780}
4781
4782long kvm_arch_vcpu_async_ioctl(struct file *filp,
4783 unsigned int ioctl, unsigned long arg)
4784{
4785 struct kvm_vcpu *vcpu = filp->private_data;
4786 void __user *argp = (void __user *)arg;
4787
4788 switch (ioctl) {
4789 case KVM_S390_IRQ: {
4790 struct kvm_s390_irq s390irq;
4791
4792 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4793 return -EFAULT;
4794 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4795 }
4796 case KVM_S390_INTERRUPT: {
4797 struct kvm_s390_interrupt s390int;
4798 struct kvm_s390_irq s390irq = {};
4799
4800 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4801 return -EFAULT;
4802 if (s390int_to_s390irq(&s390int, &s390irq))
4803 return -EINVAL;
4804 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4805 }
4806 }
4807 return -ENOIOCTLCMD;
4808}
4809
4810long kvm_arch_vcpu_ioctl(struct file *filp,
4811 unsigned int ioctl, unsigned long arg)
4812{
4813 struct kvm_vcpu *vcpu = filp->private_data;
4814 void __user *argp = (void __user *)arg;
4815 int idx;
4816 long r;
4817 u16 rc, rrc;
4818
4819 vcpu_load(vcpu);
4820
4821 switch (ioctl) {
4822 case KVM_S390_STORE_STATUS:
4823 idx = srcu_read_lock(&vcpu->kvm->srcu);
4824 r = kvm_s390_store_status_unloaded(vcpu, arg);
4825 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4826 break;
4827 case KVM_S390_SET_INITIAL_PSW: {
4828 psw_t psw;
4829
4830 r = -EFAULT;
4831 if (copy_from_user(&psw, argp, sizeof(psw)))
4832 break;
4833 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4834 break;
4835 }
4836 case KVM_S390_CLEAR_RESET:
4837 r = 0;
4838 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4839 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4840 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4841 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4842 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4843 rc, rrc);
4844 }
4845 break;
4846 case KVM_S390_INITIAL_RESET:
4847 r = 0;
4848 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4849 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4850 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4851 UVC_CMD_CPU_RESET_INITIAL,
4852 &rc, &rrc);
4853 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4854 rc, rrc);
4855 }
4856 break;
4857 case KVM_S390_NORMAL_RESET:
4858 r = 0;
4859 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4860 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4861 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4862 UVC_CMD_CPU_RESET, &rc, &rrc);
4863 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4864 rc, rrc);
4865 }
4866 break;
4867 case KVM_SET_ONE_REG:
4868 case KVM_GET_ONE_REG: {
4869 struct kvm_one_reg reg;
4870 r = -EINVAL;
4871 if (kvm_s390_pv_cpu_is_protected(vcpu))
4872 break;
4873 r = -EFAULT;
4874 if (copy_from_user(®, argp, sizeof(reg)))
4875 break;
4876 if (ioctl == KVM_SET_ONE_REG)
4877 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4878 else
4879 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4880 break;
4881 }
4882#ifdef CONFIG_KVM_S390_UCONTROL
4883 case KVM_S390_UCAS_MAP: {
4884 struct kvm_s390_ucas_mapping ucasmap;
4885
4886 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4887 r = -EFAULT;
4888 break;
4889 }
4890
4891 if (!kvm_is_ucontrol(vcpu->kvm)) {
4892 r = -EINVAL;
4893 break;
4894 }
4895
4896 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4897 ucasmap.vcpu_addr, ucasmap.length);
4898 break;
4899 }
4900 case KVM_S390_UCAS_UNMAP: {
4901 struct kvm_s390_ucas_mapping ucasmap;
4902
4903 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4904 r = -EFAULT;
4905 break;
4906 }
4907
4908 if (!kvm_is_ucontrol(vcpu->kvm)) {
4909 r = -EINVAL;
4910 break;
4911 }
4912
4913 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4914 ucasmap.length);
4915 break;
4916 }
4917#endif
4918 case KVM_S390_VCPU_FAULT: {
4919 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4920 break;
4921 }
4922 case KVM_ENABLE_CAP:
4923 {
4924 struct kvm_enable_cap cap;
4925 r = -EFAULT;
4926 if (copy_from_user(&cap, argp, sizeof(cap)))
4927 break;
4928 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4929 break;
4930 }
4931 case KVM_S390_MEM_OP: {
4932 struct kvm_s390_mem_op mem_op;
4933
4934 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4935 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4936 else
4937 r = -EFAULT;
4938 break;
4939 }
4940 case KVM_S390_SET_IRQ_STATE: {
4941 struct kvm_s390_irq_state irq_state;
4942
4943 r = -EFAULT;
4944 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4945 break;
4946 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4947 irq_state.len == 0 ||
4948 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4949 r = -EINVAL;
4950 break;
4951 }
4952 /* do not use irq_state.flags, it will break old QEMUs */
4953 r = kvm_s390_set_irq_state(vcpu,
4954 (void __user *) irq_state.buf,
4955 irq_state.len);
4956 break;
4957 }
4958 case KVM_S390_GET_IRQ_STATE: {
4959 struct kvm_s390_irq_state irq_state;
4960
4961 r = -EFAULT;
4962 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4963 break;
4964 if (irq_state.len == 0) {
4965 r = -EINVAL;
4966 break;
4967 }
4968 /* do not use irq_state.flags, it will break old QEMUs */
4969 r = kvm_s390_get_irq_state(vcpu,
4970 (__u8 __user *) irq_state.buf,
4971 irq_state.len);
4972 break;
4973 }
4974 default:
4975 r = -ENOTTY;
4976 }
4977
4978 vcpu_put(vcpu);
4979 return r;
4980}
4981
4982vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4983{
4984#ifdef CONFIG_KVM_S390_UCONTROL
4985 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4986 && (kvm_is_ucontrol(vcpu->kvm))) {
4987 vmf->page = virt_to_page(vcpu->arch.sie_block);
4988 get_page(vmf->page);
4989 return 0;
4990 }
4991#endif
4992 return VM_FAULT_SIGBUS;
4993}
4994
4995/* Section: memory related */
4996int kvm_arch_prepare_memory_region(struct kvm *kvm,
4997 struct kvm_memory_slot *memslot,
4998 const struct kvm_userspace_memory_region *mem,
4999 enum kvm_mr_change change)
5000{
5001 /* A few sanity checks. We can have memory slots which have to be
5002 located/ended at a segment boundary (1MB). The memory in userland is
5003 ok to be fragmented into various different vmas. It is okay to mmap()
5004 and munmap() stuff in this slot after doing this call at any time */
5005
5006 if (mem->userspace_addr & 0xffffful)
5007 return -EINVAL;
5008
5009 if (mem->memory_size & 0xffffful)
5010 return -EINVAL;
5011
5012 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5013 return -EINVAL;
5014
5015 /* When we are protected, we should not change the memory slots */
5016 if (kvm_s390_pv_get_handle(kvm))
5017 return -EINVAL;
5018 return 0;
5019}
5020
5021void kvm_arch_commit_memory_region(struct kvm *kvm,
5022 const struct kvm_userspace_memory_region *mem,
5023 struct kvm_memory_slot *old,
5024 const struct kvm_memory_slot *new,
5025 enum kvm_mr_change change)
5026{
5027 int rc = 0;
5028
5029 switch (change) {
5030 case KVM_MR_DELETE:
5031 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5032 old->npages * PAGE_SIZE);
5033 break;
5034 case KVM_MR_MOVE:
5035 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5036 old->npages * PAGE_SIZE);
5037 if (rc)
5038 break;
5039 fallthrough;
5040 case KVM_MR_CREATE:
5041 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5042 mem->guest_phys_addr, mem->memory_size);
5043 break;
5044 case KVM_MR_FLAGS_ONLY:
5045 break;
5046 default:
5047 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5048 }
5049 if (rc)
5050 pr_warn("failed to commit memory region\n");
5051 return;
5052}
5053
5054static inline unsigned long nonhyp_mask(int i)
5055{
5056 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5057
5058 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5059}
5060
5061void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5062{
5063 vcpu->valid_wakeup = false;
5064}
5065
5066static int __init kvm_s390_init(void)
5067{
5068 int i;
5069
5070 if (!sclp.has_sief2) {
5071 pr_info("SIE is not available\n");
5072 return -ENODEV;
5073 }
5074
5075 if (nested && hpage) {
5076 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5077 return -EINVAL;
5078 }
5079
5080 for (i = 0; i < 16; i++)
5081 kvm_s390_fac_base[i] |=
5082 stfle_fac_list[i] & nonhyp_mask(i);
5083
5084 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5085}
5086
5087static void __exit kvm_s390_exit(void)
5088{
5089 kvm_exit();
5090}
5091
5092module_init(kvm_s390_init);
5093module_exit(kvm_s390_exit);
5094
5095/*
5096 * Enable autoloading of the kvm module.
5097 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5098 * since x86 takes a different approach.
5099 */
5100#include <linux/miscdevice.h>
5101MODULE_ALIAS_MISCDEV(KVM_MINOR);
5102MODULE_ALIAS("devname:kvm");