Loading...
1/*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17#include <linux/compiler.h>
18#include <linux/err.h>
19#include <linux/fs.h>
20#include <linux/hrtimer.h>
21#include <linux/init.h>
22#include <linux/kvm.h>
23#include <linux/kvm_host.h>
24#include <linux/mman.h>
25#include <linux/module.h>
26#include <linux/random.h>
27#include <linux/slab.h>
28#include <linux/timer.h>
29#include <linux/vmalloc.h>
30#include <linux/bitmap.h>
31#include <asm/asm-offsets.h>
32#include <asm/lowcore.h>
33#include <asm/stp.h>
34#include <asm/pgtable.h>
35#include <asm/gmap.h>
36#include <asm/nmi.h>
37#include <asm/switch_to.h>
38#include <asm/isc.h>
39#include <asm/sclp.h>
40#include <asm/cpacf.h>
41#include <asm/timex.h>
42#include "kvm-s390.h"
43#include "gaccess.h"
44
45#define KMSG_COMPONENT "kvm-s390"
46#undef pr_fmt
47#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48
49#define CREATE_TRACE_POINTS
50#include "trace.h"
51#include "trace-s390.h"
52
53#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
54#define LOCAL_IRQS 32
55#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 (KVM_MAX_VCPUS + LOCAL_IRQS))
57
58#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59
60struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { "userspace_handled", VCPU_STAT(exit_userspace) },
62 { "exit_null", VCPU_STAT(exit_null) },
63 { "exit_validity", VCPU_STAT(exit_validity) },
64 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 { "exit_external_request", VCPU_STAT(exit_external_request) },
66 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 { "exit_instruction", VCPU_STAT(exit_instruction) },
68 { "exit_pei", VCPU_STAT(exit_pei) },
69 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91 { "instruction_spx", VCPU_STAT(instruction_spx) },
92 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93 { "instruction_stap", VCPU_STAT(instruction_stap) },
94 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98 { "instruction_essa", VCPU_STAT(instruction_essa) },
99 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103 { "instruction_sie", VCPU_STAT(instruction_sie) },
104 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120 { "diagnose_10", VCPU_STAT(diagnose_10) },
121 { "diagnose_44", VCPU_STAT(diagnose_44) },
122 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123 { "diagnose_258", VCPU_STAT(diagnose_258) },
124 { "diagnose_308", VCPU_STAT(diagnose_308) },
125 { "diagnose_500", VCPU_STAT(diagnose_500) },
126 { NULL }
127};
128
129/* allow nested virtualization in KVM (if enabled by user space) */
130static int nested;
131module_param(nested, int, S_IRUGO);
132MODULE_PARM_DESC(nested, "Nested virtualization support");
133
134/* upper facilities limit for kvm */
135unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
136
137unsigned long kvm_s390_fac_list_mask_size(void)
138{
139 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140 return ARRAY_SIZE(kvm_s390_fac_list_mask);
141}
142
143/* available cpu features supported by kvm */
144static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145/* available subfunctions indicated via query / "test bit" */
146static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
147
148static struct gmap_notifier gmap_notifier;
149static struct gmap_notifier vsie_gmap_notifier;
150debug_info_t *kvm_s390_dbf;
151
152/* Section: not file related */
153int kvm_arch_hardware_enable(void)
154{
155 /* every s390 is virtualization enabled ;-) */
156 return 0;
157}
158
159static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160 unsigned long end);
161
162/*
163 * This callback is executed during stop_machine(). All CPUs are therefore
164 * temporarily stopped. In order not to change guest behavior, we have to
165 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166 * so a CPU won't be stopped while calculating with the epoch.
167 */
168static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169 void *v)
170{
171 struct kvm *kvm;
172 struct kvm_vcpu *vcpu;
173 int i;
174 unsigned long long *delta = v;
175
176 list_for_each_entry(kvm, &vm_list, vm_list) {
177 kvm->arch.epoch -= *delta;
178 kvm_for_each_vcpu(i, vcpu, kvm) {
179 vcpu->arch.sie_block->epoch -= *delta;
180 if (vcpu->arch.cputm_enabled)
181 vcpu->arch.cputm_start += *delta;
182 if (vcpu->arch.vsie_block)
183 vcpu->arch.vsie_block->epoch -= *delta;
184 }
185 }
186 return NOTIFY_OK;
187}
188
189static struct notifier_block kvm_clock_notifier = {
190 .notifier_call = kvm_clock_sync,
191};
192
193int kvm_arch_hardware_setup(void)
194{
195 gmap_notifier.notifier_call = kvm_gmap_notifier;
196 gmap_register_pte_notifier(&gmap_notifier);
197 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198 gmap_register_pte_notifier(&vsie_gmap_notifier);
199 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200 &kvm_clock_notifier);
201 return 0;
202}
203
204void kvm_arch_hardware_unsetup(void)
205{
206 gmap_unregister_pte_notifier(&gmap_notifier);
207 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209 &kvm_clock_notifier);
210}
211
212static void allow_cpu_feat(unsigned long nr)
213{
214 set_bit_inv(nr, kvm_s390_available_cpu_feat);
215}
216
217static inline int plo_test_bit(unsigned char nr)
218{
219 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220 int cc = 3; /* subfunction not available */
221
222 asm volatile(
223 /* Parameter registers are ignored for "test bit" */
224 " plo 0,0,0,0(0)\n"
225 " ipm %0\n"
226 " srl %0,28\n"
227 : "=d" (cc)
228 : "d" (r0)
229 : "cc");
230 return cc == 0;
231}
232
233static void kvm_s390_cpu_feat_init(void)
234{
235 int i;
236
237 for (i = 0; i < 256; ++i) {
238 if (plo_test_bit(i))
239 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
240 }
241
242 if (test_facility(28)) /* TOD-clock steering */
243 ptff(kvm_s390_available_subfunc.ptff,
244 sizeof(kvm_s390_available_subfunc.ptff),
245 PTFF_QAF);
246
247 if (test_facility(17)) { /* MSA */
248 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249 kvm_s390_available_subfunc.kmac);
250 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251 kvm_s390_available_subfunc.kmc);
252 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.km);
254 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kimd);
256 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.klmd);
258 }
259 if (test_facility(76)) /* MSA3 */
260 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.pckmo);
262 if (test_facility(77)) { /* MSA4 */
263 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.kmctr);
265 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266 kvm_s390_available_subfunc.kmf);
267 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmo);
269 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.pcc);
271 }
272 if (test_facility(57)) /* MSA5 */
273 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.ppno);
275
276 if (MACHINE_HAS_ESOP)
277 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
278 /*
279 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
281 */
282 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283 !test_facility(3) || !nested)
284 return;
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286 if (sclp.has_64bscao)
287 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
288 if (sclp.has_siif)
289 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
290 if (sclp.has_gpere)
291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
292 if (sclp.has_gsls)
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
294 if (sclp.has_ib)
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
296 if (sclp.has_cei)
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
298 if (sclp.has_ibs)
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
300 /*
301 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302 * all skey handling functions read/set the skey from the PGSTE
303 * instead of the real storage key.
304 *
305 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306 * pages being detected as preserved although they are resident.
307 *
308 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
310 *
311 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
314 *
315 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316 * cannot easily shadow the SCA because of the ipte lock.
317 */
318}
319
320int kvm_arch_init(void *opaque)
321{
322 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
323 if (!kvm_s390_dbf)
324 return -ENOMEM;
325
326 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
327 debug_unregister(kvm_s390_dbf);
328 return -ENOMEM;
329 }
330
331 kvm_s390_cpu_feat_init();
332
333 /* Register floating interrupt controller interface. */
334 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
335}
336
337void kvm_arch_exit(void)
338{
339 debug_unregister(kvm_s390_dbf);
340}
341
342/* Section: device related */
343long kvm_arch_dev_ioctl(struct file *filp,
344 unsigned int ioctl, unsigned long arg)
345{
346 if (ioctl == KVM_S390_ENABLE_SIE)
347 return s390_enable_sie();
348 return -EINVAL;
349}
350
351int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
352{
353 int r;
354
355 switch (ext) {
356 case KVM_CAP_S390_PSW:
357 case KVM_CAP_S390_GMAP:
358 case KVM_CAP_SYNC_MMU:
359#ifdef CONFIG_KVM_S390_UCONTROL
360 case KVM_CAP_S390_UCONTROL:
361#endif
362 case KVM_CAP_ASYNC_PF:
363 case KVM_CAP_SYNC_REGS:
364 case KVM_CAP_ONE_REG:
365 case KVM_CAP_ENABLE_CAP:
366 case KVM_CAP_S390_CSS_SUPPORT:
367 case KVM_CAP_IOEVENTFD:
368 case KVM_CAP_DEVICE_CTRL:
369 case KVM_CAP_ENABLE_CAP_VM:
370 case KVM_CAP_S390_IRQCHIP:
371 case KVM_CAP_VM_ATTRIBUTES:
372 case KVM_CAP_MP_STATE:
373 case KVM_CAP_S390_INJECT_IRQ:
374 case KVM_CAP_S390_USER_SIGP:
375 case KVM_CAP_S390_USER_STSI:
376 case KVM_CAP_S390_SKEYS:
377 case KVM_CAP_S390_IRQ_STATE:
378 case KVM_CAP_S390_USER_INSTR0:
379 r = 1;
380 break;
381 case KVM_CAP_S390_MEM_OP:
382 r = MEM_OP_MAX_SIZE;
383 break;
384 case KVM_CAP_NR_VCPUS:
385 case KVM_CAP_MAX_VCPUS:
386 r = KVM_S390_BSCA_CPU_SLOTS;
387 if (!kvm_s390_use_sca_entries())
388 r = KVM_MAX_VCPUS;
389 else if (sclp.has_esca && sclp.has_64bscao)
390 r = KVM_S390_ESCA_CPU_SLOTS;
391 break;
392 case KVM_CAP_NR_MEMSLOTS:
393 r = KVM_USER_MEM_SLOTS;
394 break;
395 case KVM_CAP_S390_COW:
396 r = MACHINE_HAS_ESOP;
397 break;
398 case KVM_CAP_S390_VECTOR_REGISTERS:
399 r = MACHINE_HAS_VX;
400 break;
401 case KVM_CAP_S390_RI:
402 r = test_facility(64);
403 break;
404 default:
405 r = 0;
406 }
407 return r;
408}
409
410static void kvm_s390_sync_dirty_log(struct kvm *kvm,
411 struct kvm_memory_slot *memslot)
412{
413 gfn_t cur_gfn, last_gfn;
414 unsigned long address;
415 struct gmap *gmap = kvm->arch.gmap;
416
417 /* Loop over all guest pages */
418 last_gfn = memslot->base_gfn + memslot->npages;
419 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
420 address = gfn_to_hva_memslot(memslot, cur_gfn);
421
422 if (test_and_clear_guest_dirty(gmap->mm, address))
423 mark_page_dirty(kvm, cur_gfn);
424 if (fatal_signal_pending(current))
425 return;
426 cond_resched();
427 }
428}
429
430/* Section: vm related */
431static void sca_del_vcpu(struct kvm_vcpu *vcpu);
432
433/*
434 * Get (and clear) the dirty memory log for a memory slot.
435 */
436int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
437 struct kvm_dirty_log *log)
438{
439 int r;
440 unsigned long n;
441 struct kvm_memslots *slots;
442 struct kvm_memory_slot *memslot;
443 int is_dirty = 0;
444
445 if (kvm_is_ucontrol(kvm))
446 return -EINVAL;
447
448 mutex_lock(&kvm->slots_lock);
449
450 r = -EINVAL;
451 if (log->slot >= KVM_USER_MEM_SLOTS)
452 goto out;
453
454 slots = kvm_memslots(kvm);
455 memslot = id_to_memslot(slots, log->slot);
456 r = -ENOENT;
457 if (!memslot->dirty_bitmap)
458 goto out;
459
460 kvm_s390_sync_dirty_log(kvm, memslot);
461 r = kvm_get_dirty_log(kvm, log, &is_dirty);
462 if (r)
463 goto out;
464
465 /* Clear the dirty log */
466 if (is_dirty) {
467 n = kvm_dirty_bitmap_bytes(memslot);
468 memset(memslot->dirty_bitmap, 0, n);
469 }
470 r = 0;
471out:
472 mutex_unlock(&kvm->slots_lock);
473 return r;
474}
475
476static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
477{
478 unsigned int i;
479 struct kvm_vcpu *vcpu;
480
481 kvm_for_each_vcpu(i, vcpu, kvm) {
482 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
483 }
484}
485
486static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
487{
488 int r;
489
490 if (cap->flags)
491 return -EINVAL;
492
493 switch (cap->cap) {
494 case KVM_CAP_S390_IRQCHIP:
495 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
496 kvm->arch.use_irqchip = 1;
497 r = 0;
498 break;
499 case KVM_CAP_S390_USER_SIGP:
500 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
501 kvm->arch.user_sigp = 1;
502 r = 0;
503 break;
504 case KVM_CAP_S390_VECTOR_REGISTERS:
505 mutex_lock(&kvm->lock);
506 if (kvm->created_vcpus) {
507 r = -EBUSY;
508 } else if (MACHINE_HAS_VX) {
509 set_kvm_facility(kvm->arch.model.fac_mask, 129);
510 set_kvm_facility(kvm->arch.model.fac_list, 129);
511 r = 0;
512 } else
513 r = -EINVAL;
514 mutex_unlock(&kvm->lock);
515 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
516 r ? "(not available)" : "(success)");
517 break;
518 case KVM_CAP_S390_RI:
519 r = -EINVAL;
520 mutex_lock(&kvm->lock);
521 if (kvm->created_vcpus) {
522 r = -EBUSY;
523 } else if (test_facility(64)) {
524 set_kvm_facility(kvm->arch.model.fac_mask, 64);
525 set_kvm_facility(kvm->arch.model.fac_list, 64);
526 r = 0;
527 }
528 mutex_unlock(&kvm->lock);
529 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
530 r ? "(not available)" : "(success)");
531 break;
532 case KVM_CAP_S390_USER_STSI:
533 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
534 kvm->arch.user_stsi = 1;
535 r = 0;
536 break;
537 case KVM_CAP_S390_USER_INSTR0:
538 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
539 kvm->arch.user_instr0 = 1;
540 icpt_operexc_on_all_vcpus(kvm);
541 r = 0;
542 break;
543 default:
544 r = -EINVAL;
545 break;
546 }
547 return r;
548}
549
550static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
551{
552 int ret;
553
554 switch (attr->attr) {
555 case KVM_S390_VM_MEM_LIMIT_SIZE:
556 ret = 0;
557 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
558 kvm->arch.mem_limit);
559 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
560 ret = -EFAULT;
561 break;
562 default:
563 ret = -ENXIO;
564 break;
565 }
566 return ret;
567}
568
569static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
570{
571 int ret;
572 unsigned int idx;
573 switch (attr->attr) {
574 case KVM_S390_VM_MEM_ENABLE_CMMA:
575 ret = -ENXIO;
576 if (!sclp.has_cmma)
577 break;
578
579 ret = -EBUSY;
580 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
581 mutex_lock(&kvm->lock);
582 if (!kvm->created_vcpus) {
583 kvm->arch.use_cmma = 1;
584 ret = 0;
585 }
586 mutex_unlock(&kvm->lock);
587 break;
588 case KVM_S390_VM_MEM_CLR_CMMA:
589 ret = -ENXIO;
590 if (!sclp.has_cmma)
591 break;
592 ret = -EINVAL;
593 if (!kvm->arch.use_cmma)
594 break;
595
596 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
597 mutex_lock(&kvm->lock);
598 idx = srcu_read_lock(&kvm->srcu);
599 s390_reset_cmma(kvm->arch.gmap->mm);
600 srcu_read_unlock(&kvm->srcu, idx);
601 mutex_unlock(&kvm->lock);
602 ret = 0;
603 break;
604 case KVM_S390_VM_MEM_LIMIT_SIZE: {
605 unsigned long new_limit;
606
607 if (kvm_is_ucontrol(kvm))
608 return -EINVAL;
609
610 if (get_user(new_limit, (u64 __user *)attr->addr))
611 return -EFAULT;
612
613 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
614 new_limit > kvm->arch.mem_limit)
615 return -E2BIG;
616
617 if (!new_limit)
618 return -EINVAL;
619
620 /* gmap_create takes last usable address */
621 if (new_limit != KVM_S390_NO_MEM_LIMIT)
622 new_limit -= 1;
623
624 ret = -EBUSY;
625 mutex_lock(&kvm->lock);
626 if (!kvm->created_vcpus) {
627 /* gmap_create will round the limit up */
628 struct gmap *new = gmap_create(current->mm, new_limit);
629
630 if (!new) {
631 ret = -ENOMEM;
632 } else {
633 gmap_remove(kvm->arch.gmap);
634 new->private = kvm;
635 kvm->arch.gmap = new;
636 ret = 0;
637 }
638 }
639 mutex_unlock(&kvm->lock);
640 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
641 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
642 (void *) kvm->arch.gmap->asce);
643 break;
644 }
645 default:
646 ret = -ENXIO;
647 break;
648 }
649 return ret;
650}
651
652static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
653
654static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
655{
656 struct kvm_vcpu *vcpu;
657 int i;
658
659 if (!test_kvm_facility(kvm, 76))
660 return -EINVAL;
661
662 mutex_lock(&kvm->lock);
663 switch (attr->attr) {
664 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
665 get_random_bytes(
666 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
667 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
668 kvm->arch.crypto.aes_kw = 1;
669 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
670 break;
671 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
672 get_random_bytes(
673 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
674 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
675 kvm->arch.crypto.dea_kw = 1;
676 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
677 break;
678 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
679 kvm->arch.crypto.aes_kw = 0;
680 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
681 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
682 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
683 break;
684 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
685 kvm->arch.crypto.dea_kw = 0;
686 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
687 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
688 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
689 break;
690 default:
691 mutex_unlock(&kvm->lock);
692 return -ENXIO;
693 }
694
695 kvm_for_each_vcpu(i, vcpu, kvm) {
696 kvm_s390_vcpu_crypto_setup(vcpu);
697 exit_sie(vcpu);
698 }
699 mutex_unlock(&kvm->lock);
700 return 0;
701}
702
703static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
704{
705 u8 gtod_high;
706
707 if (copy_from_user(>od_high, (void __user *)attr->addr,
708 sizeof(gtod_high)))
709 return -EFAULT;
710
711 if (gtod_high != 0)
712 return -EINVAL;
713 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
714
715 return 0;
716}
717
718static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
719{
720 u64 gtod;
721
722 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
723 return -EFAULT;
724
725 kvm_s390_set_tod_clock(kvm, gtod);
726 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
727 return 0;
728}
729
730static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
731{
732 int ret;
733
734 if (attr->flags)
735 return -EINVAL;
736
737 switch (attr->attr) {
738 case KVM_S390_VM_TOD_HIGH:
739 ret = kvm_s390_set_tod_high(kvm, attr);
740 break;
741 case KVM_S390_VM_TOD_LOW:
742 ret = kvm_s390_set_tod_low(kvm, attr);
743 break;
744 default:
745 ret = -ENXIO;
746 break;
747 }
748 return ret;
749}
750
751static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
752{
753 u8 gtod_high = 0;
754
755 if (copy_to_user((void __user *)attr->addr, >od_high,
756 sizeof(gtod_high)))
757 return -EFAULT;
758 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
759
760 return 0;
761}
762
763static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
764{
765 u64 gtod;
766
767 gtod = kvm_s390_get_tod_clock_fast(kvm);
768 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
769 return -EFAULT;
770 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
771
772 return 0;
773}
774
775static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
776{
777 int ret;
778
779 if (attr->flags)
780 return -EINVAL;
781
782 switch (attr->attr) {
783 case KVM_S390_VM_TOD_HIGH:
784 ret = kvm_s390_get_tod_high(kvm, attr);
785 break;
786 case KVM_S390_VM_TOD_LOW:
787 ret = kvm_s390_get_tod_low(kvm, attr);
788 break;
789 default:
790 ret = -ENXIO;
791 break;
792 }
793 return ret;
794}
795
796static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
797{
798 struct kvm_s390_vm_cpu_processor *proc;
799 u16 lowest_ibc, unblocked_ibc;
800 int ret = 0;
801
802 mutex_lock(&kvm->lock);
803 if (kvm->created_vcpus) {
804 ret = -EBUSY;
805 goto out;
806 }
807 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
808 if (!proc) {
809 ret = -ENOMEM;
810 goto out;
811 }
812 if (!copy_from_user(proc, (void __user *)attr->addr,
813 sizeof(*proc))) {
814 kvm->arch.model.cpuid = proc->cpuid;
815 lowest_ibc = sclp.ibc >> 16 & 0xfff;
816 unblocked_ibc = sclp.ibc & 0xfff;
817 if (lowest_ibc && proc->ibc) {
818 if (proc->ibc > unblocked_ibc)
819 kvm->arch.model.ibc = unblocked_ibc;
820 else if (proc->ibc < lowest_ibc)
821 kvm->arch.model.ibc = lowest_ibc;
822 else
823 kvm->arch.model.ibc = proc->ibc;
824 }
825 memcpy(kvm->arch.model.fac_list, proc->fac_list,
826 S390_ARCH_FAC_LIST_SIZE_BYTE);
827 } else
828 ret = -EFAULT;
829 kfree(proc);
830out:
831 mutex_unlock(&kvm->lock);
832 return ret;
833}
834
835static int kvm_s390_set_processor_feat(struct kvm *kvm,
836 struct kvm_device_attr *attr)
837{
838 struct kvm_s390_vm_cpu_feat data;
839 int ret = -EBUSY;
840
841 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
842 return -EFAULT;
843 if (!bitmap_subset((unsigned long *) data.feat,
844 kvm_s390_available_cpu_feat,
845 KVM_S390_VM_CPU_FEAT_NR_BITS))
846 return -EINVAL;
847
848 mutex_lock(&kvm->lock);
849 if (!atomic_read(&kvm->online_vcpus)) {
850 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
851 KVM_S390_VM_CPU_FEAT_NR_BITS);
852 ret = 0;
853 }
854 mutex_unlock(&kvm->lock);
855 return ret;
856}
857
858static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
859 struct kvm_device_attr *attr)
860{
861 /*
862 * Once supported by kernel + hw, we have to store the subfunctions
863 * in kvm->arch and remember that user space configured them.
864 */
865 return -ENXIO;
866}
867
868static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
869{
870 int ret = -ENXIO;
871
872 switch (attr->attr) {
873 case KVM_S390_VM_CPU_PROCESSOR:
874 ret = kvm_s390_set_processor(kvm, attr);
875 break;
876 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
877 ret = kvm_s390_set_processor_feat(kvm, attr);
878 break;
879 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
880 ret = kvm_s390_set_processor_subfunc(kvm, attr);
881 break;
882 }
883 return ret;
884}
885
886static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
887{
888 struct kvm_s390_vm_cpu_processor *proc;
889 int ret = 0;
890
891 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
892 if (!proc) {
893 ret = -ENOMEM;
894 goto out;
895 }
896 proc->cpuid = kvm->arch.model.cpuid;
897 proc->ibc = kvm->arch.model.ibc;
898 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
899 S390_ARCH_FAC_LIST_SIZE_BYTE);
900 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
901 ret = -EFAULT;
902 kfree(proc);
903out:
904 return ret;
905}
906
907static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
908{
909 struct kvm_s390_vm_cpu_machine *mach;
910 int ret = 0;
911
912 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
913 if (!mach) {
914 ret = -ENOMEM;
915 goto out;
916 }
917 get_cpu_id((struct cpuid *) &mach->cpuid);
918 mach->ibc = sclp.ibc;
919 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
920 S390_ARCH_FAC_LIST_SIZE_BYTE);
921 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
922 sizeof(S390_lowcore.stfle_fac_list));
923 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
924 ret = -EFAULT;
925 kfree(mach);
926out:
927 return ret;
928}
929
930static int kvm_s390_get_processor_feat(struct kvm *kvm,
931 struct kvm_device_attr *attr)
932{
933 struct kvm_s390_vm_cpu_feat data;
934
935 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
936 KVM_S390_VM_CPU_FEAT_NR_BITS);
937 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
938 return -EFAULT;
939 return 0;
940}
941
942static int kvm_s390_get_machine_feat(struct kvm *kvm,
943 struct kvm_device_attr *attr)
944{
945 struct kvm_s390_vm_cpu_feat data;
946
947 bitmap_copy((unsigned long *) data.feat,
948 kvm_s390_available_cpu_feat,
949 KVM_S390_VM_CPU_FEAT_NR_BITS);
950 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
951 return -EFAULT;
952 return 0;
953}
954
955static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
956 struct kvm_device_attr *attr)
957{
958 /*
959 * Once we can actually configure subfunctions (kernel + hw support),
960 * we have to check if they were already set by user space, if so copy
961 * them from kvm->arch.
962 */
963 return -ENXIO;
964}
965
966static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
967 struct kvm_device_attr *attr)
968{
969 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
970 sizeof(struct kvm_s390_vm_cpu_subfunc)))
971 return -EFAULT;
972 return 0;
973}
974static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
975{
976 int ret = -ENXIO;
977
978 switch (attr->attr) {
979 case KVM_S390_VM_CPU_PROCESSOR:
980 ret = kvm_s390_get_processor(kvm, attr);
981 break;
982 case KVM_S390_VM_CPU_MACHINE:
983 ret = kvm_s390_get_machine(kvm, attr);
984 break;
985 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
986 ret = kvm_s390_get_processor_feat(kvm, attr);
987 break;
988 case KVM_S390_VM_CPU_MACHINE_FEAT:
989 ret = kvm_s390_get_machine_feat(kvm, attr);
990 break;
991 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
992 ret = kvm_s390_get_processor_subfunc(kvm, attr);
993 break;
994 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
995 ret = kvm_s390_get_machine_subfunc(kvm, attr);
996 break;
997 }
998 return ret;
999}
1000
1001static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1002{
1003 int ret;
1004
1005 switch (attr->group) {
1006 case KVM_S390_VM_MEM_CTRL:
1007 ret = kvm_s390_set_mem_control(kvm, attr);
1008 break;
1009 case KVM_S390_VM_TOD:
1010 ret = kvm_s390_set_tod(kvm, attr);
1011 break;
1012 case KVM_S390_VM_CPU_MODEL:
1013 ret = kvm_s390_set_cpu_model(kvm, attr);
1014 break;
1015 case KVM_S390_VM_CRYPTO:
1016 ret = kvm_s390_vm_set_crypto(kvm, attr);
1017 break;
1018 default:
1019 ret = -ENXIO;
1020 break;
1021 }
1022
1023 return ret;
1024}
1025
1026static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1027{
1028 int ret;
1029
1030 switch (attr->group) {
1031 case KVM_S390_VM_MEM_CTRL:
1032 ret = kvm_s390_get_mem_control(kvm, attr);
1033 break;
1034 case KVM_S390_VM_TOD:
1035 ret = kvm_s390_get_tod(kvm, attr);
1036 break;
1037 case KVM_S390_VM_CPU_MODEL:
1038 ret = kvm_s390_get_cpu_model(kvm, attr);
1039 break;
1040 default:
1041 ret = -ENXIO;
1042 break;
1043 }
1044
1045 return ret;
1046}
1047
1048static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1049{
1050 int ret;
1051
1052 switch (attr->group) {
1053 case KVM_S390_VM_MEM_CTRL:
1054 switch (attr->attr) {
1055 case KVM_S390_VM_MEM_ENABLE_CMMA:
1056 case KVM_S390_VM_MEM_CLR_CMMA:
1057 ret = sclp.has_cmma ? 0 : -ENXIO;
1058 break;
1059 case KVM_S390_VM_MEM_LIMIT_SIZE:
1060 ret = 0;
1061 break;
1062 default:
1063 ret = -ENXIO;
1064 break;
1065 }
1066 break;
1067 case KVM_S390_VM_TOD:
1068 switch (attr->attr) {
1069 case KVM_S390_VM_TOD_LOW:
1070 case KVM_S390_VM_TOD_HIGH:
1071 ret = 0;
1072 break;
1073 default:
1074 ret = -ENXIO;
1075 break;
1076 }
1077 break;
1078 case KVM_S390_VM_CPU_MODEL:
1079 switch (attr->attr) {
1080 case KVM_S390_VM_CPU_PROCESSOR:
1081 case KVM_S390_VM_CPU_MACHINE:
1082 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1083 case KVM_S390_VM_CPU_MACHINE_FEAT:
1084 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1085 ret = 0;
1086 break;
1087 /* configuring subfunctions is not supported yet */
1088 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1089 default:
1090 ret = -ENXIO;
1091 break;
1092 }
1093 break;
1094 case KVM_S390_VM_CRYPTO:
1095 switch (attr->attr) {
1096 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1097 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1098 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1099 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1100 ret = 0;
1101 break;
1102 default:
1103 ret = -ENXIO;
1104 break;
1105 }
1106 break;
1107 default:
1108 ret = -ENXIO;
1109 break;
1110 }
1111
1112 return ret;
1113}
1114
1115static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1116{
1117 uint8_t *keys;
1118 uint64_t hva;
1119 int i, r = 0;
1120
1121 if (args->flags != 0)
1122 return -EINVAL;
1123
1124 /* Is this guest using storage keys? */
1125 if (!mm_use_skey(current->mm))
1126 return KVM_S390_GET_SKEYS_NONE;
1127
1128 /* Enforce sane limit on memory allocation */
1129 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1130 return -EINVAL;
1131
1132 keys = kmalloc_array(args->count, sizeof(uint8_t),
1133 GFP_KERNEL | __GFP_NOWARN);
1134 if (!keys)
1135 keys = vmalloc(sizeof(uint8_t) * args->count);
1136 if (!keys)
1137 return -ENOMEM;
1138
1139 down_read(¤t->mm->mmap_sem);
1140 for (i = 0; i < args->count; i++) {
1141 hva = gfn_to_hva(kvm, args->start_gfn + i);
1142 if (kvm_is_error_hva(hva)) {
1143 r = -EFAULT;
1144 break;
1145 }
1146
1147 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1148 if (r)
1149 break;
1150 }
1151 up_read(¤t->mm->mmap_sem);
1152
1153 if (!r) {
1154 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1155 sizeof(uint8_t) * args->count);
1156 if (r)
1157 r = -EFAULT;
1158 }
1159
1160 kvfree(keys);
1161 return r;
1162}
1163
1164static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1165{
1166 uint8_t *keys;
1167 uint64_t hva;
1168 int i, r = 0;
1169
1170 if (args->flags != 0)
1171 return -EINVAL;
1172
1173 /* Enforce sane limit on memory allocation */
1174 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1175 return -EINVAL;
1176
1177 keys = kmalloc_array(args->count, sizeof(uint8_t),
1178 GFP_KERNEL | __GFP_NOWARN);
1179 if (!keys)
1180 keys = vmalloc(sizeof(uint8_t) * args->count);
1181 if (!keys)
1182 return -ENOMEM;
1183
1184 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1185 sizeof(uint8_t) * args->count);
1186 if (r) {
1187 r = -EFAULT;
1188 goto out;
1189 }
1190
1191 /* Enable storage key handling for the guest */
1192 r = s390_enable_skey();
1193 if (r)
1194 goto out;
1195
1196 down_read(¤t->mm->mmap_sem);
1197 for (i = 0; i < args->count; i++) {
1198 hva = gfn_to_hva(kvm, args->start_gfn + i);
1199 if (kvm_is_error_hva(hva)) {
1200 r = -EFAULT;
1201 break;
1202 }
1203
1204 /* Lowest order bit is reserved */
1205 if (keys[i] & 0x01) {
1206 r = -EINVAL;
1207 break;
1208 }
1209
1210 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1211 if (r)
1212 break;
1213 }
1214 up_read(¤t->mm->mmap_sem);
1215out:
1216 kvfree(keys);
1217 return r;
1218}
1219
1220long kvm_arch_vm_ioctl(struct file *filp,
1221 unsigned int ioctl, unsigned long arg)
1222{
1223 struct kvm *kvm = filp->private_data;
1224 void __user *argp = (void __user *)arg;
1225 struct kvm_device_attr attr;
1226 int r;
1227
1228 switch (ioctl) {
1229 case KVM_S390_INTERRUPT: {
1230 struct kvm_s390_interrupt s390int;
1231
1232 r = -EFAULT;
1233 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1234 break;
1235 r = kvm_s390_inject_vm(kvm, &s390int);
1236 break;
1237 }
1238 case KVM_ENABLE_CAP: {
1239 struct kvm_enable_cap cap;
1240 r = -EFAULT;
1241 if (copy_from_user(&cap, argp, sizeof(cap)))
1242 break;
1243 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1244 break;
1245 }
1246 case KVM_CREATE_IRQCHIP: {
1247 struct kvm_irq_routing_entry routing;
1248
1249 r = -EINVAL;
1250 if (kvm->arch.use_irqchip) {
1251 /* Set up dummy routing. */
1252 memset(&routing, 0, sizeof(routing));
1253 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1254 }
1255 break;
1256 }
1257 case KVM_SET_DEVICE_ATTR: {
1258 r = -EFAULT;
1259 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1260 break;
1261 r = kvm_s390_vm_set_attr(kvm, &attr);
1262 break;
1263 }
1264 case KVM_GET_DEVICE_ATTR: {
1265 r = -EFAULT;
1266 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1267 break;
1268 r = kvm_s390_vm_get_attr(kvm, &attr);
1269 break;
1270 }
1271 case KVM_HAS_DEVICE_ATTR: {
1272 r = -EFAULT;
1273 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1274 break;
1275 r = kvm_s390_vm_has_attr(kvm, &attr);
1276 break;
1277 }
1278 case KVM_S390_GET_SKEYS: {
1279 struct kvm_s390_skeys args;
1280
1281 r = -EFAULT;
1282 if (copy_from_user(&args, argp,
1283 sizeof(struct kvm_s390_skeys)))
1284 break;
1285 r = kvm_s390_get_skeys(kvm, &args);
1286 break;
1287 }
1288 case KVM_S390_SET_SKEYS: {
1289 struct kvm_s390_skeys args;
1290
1291 r = -EFAULT;
1292 if (copy_from_user(&args, argp,
1293 sizeof(struct kvm_s390_skeys)))
1294 break;
1295 r = kvm_s390_set_skeys(kvm, &args);
1296 break;
1297 }
1298 default:
1299 r = -ENOTTY;
1300 }
1301
1302 return r;
1303}
1304
1305static int kvm_s390_query_ap_config(u8 *config)
1306{
1307 u32 fcn_code = 0x04000000UL;
1308 u32 cc = 0;
1309
1310 memset(config, 0, 128);
1311 asm volatile(
1312 "lgr 0,%1\n"
1313 "lgr 2,%2\n"
1314 ".long 0xb2af0000\n" /* PQAP(QCI) */
1315 "0: ipm %0\n"
1316 "srl %0,28\n"
1317 "1:\n"
1318 EX_TABLE(0b, 1b)
1319 : "+r" (cc)
1320 : "r" (fcn_code), "r" (config)
1321 : "cc", "0", "2", "memory"
1322 );
1323
1324 return cc;
1325}
1326
1327static int kvm_s390_apxa_installed(void)
1328{
1329 u8 config[128];
1330 int cc;
1331
1332 if (test_facility(12)) {
1333 cc = kvm_s390_query_ap_config(config);
1334
1335 if (cc)
1336 pr_err("PQAP(QCI) failed with cc=%d", cc);
1337 else
1338 return config[0] & 0x40;
1339 }
1340
1341 return 0;
1342}
1343
1344static void kvm_s390_set_crycb_format(struct kvm *kvm)
1345{
1346 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1347
1348 if (kvm_s390_apxa_installed())
1349 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1350 else
1351 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1352}
1353
1354static u64 kvm_s390_get_initial_cpuid(void)
1355{
1356 struct cpuid cpuid;
1357
1358 get_cpu_id(&cpuid);
1359 cpuid.version = 0xff;
1360 return *((u64 *) &cpuid);
1361}
1362
1363static void kvm_s390_crypto_init(struct kvm *kvm)
1364{
1365 if (!test_kvm_facility(kvm, 76))
1366 return;
1367
1368 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1369 kvm_s390_set_crycb_format(kvm);
1370
1371 /* Enable AES/DEA protected key functions by default */
1372 kvm->arch.crypto.aes_kw = 1;
1373 kvm->arch.crypto.dea_kw = 1;
1374 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1375 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1376 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1377 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1378}
1379
1380static void sca_dispose(struct kvm *kvm)
1381{
1382 if (kvm->arch.use_esca)
1383 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1384 else
1385 free_page((unsigned long)(kvm->arch.sca));
1386 kvm->arch.sca = NULL;
1387}
1388
1389int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1390{
1391 gfp_t alloc_flags = GFP_KERNEL;
1392 int i, rc;
1393 char debug_name[16];
1394 static unsigned long sca_offset;
1395
1396 rc = -EINVAL;
1397#ifdef CONFIG_KVM_S390_UCONTROL
1398 if (type & ~KVM_VM_S390_UCONTROL)
1399 goto out_err;
1400 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1401 goto out_err;
1402#else
1403 if (type)
1404 goto out_err;
1405#endif
1406
1407 rc = s390_enable_sie();
1408 if (rc)
1409 goto out_err;
1410
1411 rc = -ENOMEM;
1412
1413 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1414
1415 kvm->arch.use_esca = 0; /* start with basic SCA */
1416 if (!sclp.has_64bscao)
1417 alloc_flags |= GFP_DMA;
1418 rwlock_init(&kvm->arch.sca_lock);
1419 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1420 if (!kvm->arch.sca)
1421 goto out_err;
1422 spin_lock(&kvm_lock);
1423 sca_offset += 16;
1424 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1425 sca_offset = 0;
1426 kvm->arch.sca = (struct bsca_block *)
1427 ((char *) kvm->arch.sca + sca_offset);
1428 spin_unlock(&kvm_lock);
1429
1430 sprintf(debug_name, "kvm-%u", current->pid);
1431
1432 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1433 if (!kvm->arch.dbf)
1434 goto out_err;
1435
1436 kvm->arch.sie_page2 =
1437 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1438 if (!kvm->arch.sie_page2)
1439 goto out_err;
1440
1441 /* Populate the facility mask initially. */
1442 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1443 sizeof(S390_lowcore.stfle_fac_list));
1444 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1445 if (i < kvm_s390_fac_list_mask_size())
1446 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1447 else
1448 kvm->arch.model.fac_mask[i] = 0UL;
1449 }
1450
1451 /* Populate the facility list initially. */
1452 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1453 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1454 S390_ARCH_FAC_LIST_SIZE_BYTE);
1455
1456 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1457 set_kvm_facility(kvm->arch.model.fac_list, 74);
1458
1459 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1460 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1461
1462 kvm_s390_crypto_init(kvm);
1463
1464 spin_lock_init(&kvm->arch.float_int.lock);
1465 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1466 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1467 init_waitqueue_head(&kvm->arch.ipte_wq);
1468 mutex_init(&kvm->arch.ipte_mutex);
1469
1470 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1471 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1472
1473 if (type & KVM_VM_S390_UCONTROL) {
1474 kvm->arch.gmap = NULL;
1475 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1476 } else {
1477 if (sclp.hamax == U64_MAX)
1478 kvm->arch.mem_limit = TASK_MAX_SIZE;
1479 else
1480 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1481 sclp.hamax + 1);
1482 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1483 if (!kvm->arch.gmap)
1484 goto out_err;
1485 kvm->arch.gmap->private = kvm;
1486 kvm->arch.gmap->pfault_enabled = 0;
1487 }
1488
1489 kvm->arch.css_support = 0;
1490 kvm->arch.use_irqchip = 0;
1491 kvm->arch.epoch = 0;
1492
1493 spin_lock_init(&kvm->arch.start_stop_lock);
1494 kvm_s390_vsie_init(kvm);
1495 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1496
1497 return 0;
1498out_err:
1499 free_page((unsigned long)kvm->arch.sie_page2);
1500 debug_unregister(kvm->arch.dbf);
1501 sca_dispose(kvm);
1502 KVM_EVENT(3, "creation of vm failed: %d", rc);
1503 return rc;
1504}
1505
1506bool kvm_arch_has_vcpu_debugfs(void)
1507{
1508 return false;
1509}
1510
1511int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1512{
1513 return 0;
1514}
1515
1516void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1517{
1518 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1519 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1520 kvm_s390_clear_local_irqs(vcpu);
1521 kvm_clear_async_pf_completion_queue(vcpu);
1522 if (!kvm_is_ucontrol(vcpu->kvm))
1523 sca_del_vcpu(vcpu);
1524
1525 if (kvm_is_ucontrol(vcpu->kvm))
1526 gmap_remove(vcpu->arch.gmap);
1527
1528 if (vcpu->kvm->arch.use_cmma)
1529 kvm_s390_vcpu_unsetup_cmma(vcpu);
1530 free_page((unsigned long)(vcpu->arch.sie_block));
1531
1532 kvm_vcpu_uninit(vcpu);
1533 kmem_cache_free(kvm_vcpu_cache, vcpu);
1534}
1535
1536static void kvm_free_vcpus(struct kvm *kvm)
1537{
1538 unsigned int i;
1539 struct kvm_vcpu *vcpu;
1540
1541 kvm_for_each_vcpu(i, vcpu, kvm)
1542 kvm_arch_vcpu_destroy(vcpu);
1543
1544 mutex_lock(&kvm->lock);
1545 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1546 kvm->vcpus[i] = NULL;
1547
1548 atomic_set(&kvm->online_vcpus, 0);
1549 mutex_unlock(&kvm->lock);
1550}
1551
1552void kvm_arch_destroy_vm(struct kvm *kvm)
1553{
1554 kvm_free_vcpus(kvm);
1555 sca_dispose(kvm);
1556 debug_unregister(kvm->arch.dbf);
1557 free_page((unsigned long)kvm->arch.sie_page2);
1558 if (!kvm_is_ucontrol(kvm))
1559 gmap_remove(kvm->arch.gmap);
1560 kvm_s390_destroy_adapters(kvm);
1561 kvm_s390_clear_float_irqs(kvm);
1562 kvm_s390_vsie_destroy(kvm);
1563 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1564}
1565
1566/* Section: vcpu related */
1567static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1568{
1569 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1570 if (!vcpu->arch.gmap)
1571 return -ENOMEM;
1572 vcpu->arch.gmap->private = vcpu->kvm;
1573
1574 return 0;
1575}
1576
1577static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1578{
1579 if (!kvm_s390_use_sca_entries())
1580 return;
1581 read_lock(&vcpu->kvm->arch.sca_lock);
1582 if (vcpu->kvm->arch.use_esca) {
1583 struct esca_block *sca = vcpu->kvm->arch.sca;
1584
1585 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1586 sca->cpu[vcpu->vcpu_id].sda = 0;
1587 } else {
1588 struct bsca_block *sca = vcpu->kvm->arch.sca;
1589
1590 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1591 sca->cpu[vcpu->vcpu_id].sda = 0;
1592 }
1593 read_unlock(&vcpu->kvm->arch.sca_lock);
1594}
1595
1596static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1597{
1598 if (!kvm_s390_use_sca_entries()) {
1599 struct bsca_block *sca = vcpu->kvm->arch.sca;
1600
1601 /* we still need the basic sca for the ipte control */
1602 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1603 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1604 }
1605 read_lock(&vcpu->kvm->arch.sca_lock);
1606 if (vcpu->kvm->arch.use_esca) {
1607 struct esca_block *sca = vcpu->kvm->arch.sca;
1608
1609 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1610 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1611 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1612 vcpu->arch.sie_block->ecb2 |= 0x04U;
1613 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1614 } else {
1615 struct bsca_block *sca = vcpu->kvm->arch.sca;
1616
1617 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1618 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1619 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1620 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1621 }
1622 read_unlock(&vcpu->kvm->arch.sca_lock);
1623}
1624
1625/* Basic SCA to Extended SCA data copy routines */
1626static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1627{
1628 d->sda = s->sda;
1629 d->sigp_ctrl.c = s->sigp_ctrl.c;
1630 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1631}
1632
1633static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1634{
1635 int i;
1636
1637 d->ipte_control = s->ipte_control;
1638 d->mcn[0] = s->mcn;
1639 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1640 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1641}
1642
1643static int sca_switch_to_extended(struct kvm *kvm)
1644{
1645 struct bsca_block *old_sca = kvm->arch.sca;
1646 struct esca_block *new_sca;
1647 struct kvm_vcpu *vcpu;
1648 unsigned int vcpu_idx;
1649 u32 scaol, scaoh;
1650
1651 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1652 if (!new_sca)
1653 return -ENOMEM;
1654
1655 scaoh = (u32)((u64)(new_sca) >> 32);
1656 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1657
1658 kvm_s390_vcpu_block_all(kvm);
1659 write_lock(&kvm->arch.sca_lock);
1660
1661 sca_copy_b_to_e(new_sca, old_sca);
1662
1663 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1664 vcpu->arch.sie_block->scaoh = scaoh;
1665 vcpu->arch.sie_block->scaol = scaol;
1666 vcpu->arch.sie_block->ecb2 |= 0x04U;
1667 }
1668 kvm->arch.sca = new_sca;
1669 kvm->arch.use_esca = 1;
1670
1671 write_unlock(&kvm->arch.sca_lock);
1672 kvm_s390_vcpu_unblock_all(kvm);
1673
1674 free_page((unsigned long)old_sca);
1675
1676 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1677 old_sca, kvm->arch.sca);
1678 return 0;
1679}
1680
1681static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1682{
1683 int rc;
1684
1685 if (!kvm_s390_use_sca_entries()) {
1686 if (id < KVM_MAX_VCPUS)
1687 return true;
1688 return false;
1689 }
1690 if (id < KVM_S390_BSCA_CPU_SLOTS)
1691 return true;
1692 if (!sclp.has_esca || !sclp.has_64bscao)
1693 return false;
1694
1695 mutex_lock(&kvm->lock);
1696 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1697 mutex_unlock(&kvm->lock);
1698
1699 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1700}
1701
1702int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1703{
1704 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1705 kvm_clear_async_pf_completion_queue(vcpu);
1706 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1707 KVM_SYNC_GPRS |
1708 KVM_SYNC_ACRS |
1709 KVM_SYNC_CRS |
1710 KVM_SYNC_ARCH0 |
1711 KVM_SYNC_PFAULT;
1712 kvm_s390_set_prefix(vcpu, 0);
1713 if (test_kvm_facility(vcpu->kvm, 64))
1714 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1715 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1716 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1717 */
1718 if (MACHINE_HAS_VX)
1719 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1720 else
1721 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1722
1723 if (kvm_is_ucontrol(vcpu->kvm))
1724 return __kvm_ucontrol_vcpu_init(vcpu);
1725
1726 return 0;
1727}
1728
1729/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1730static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1731{
1732 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1733 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1734 vcpu->arch.cputm_start = get_tod_clock_fast();
1735 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1736}
1737
1738/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1739static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1740{
1741 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1742 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1743 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1744 vcpu->arch.cputm_start = 0;
1745 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1746}
1747
1748/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1749static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1750{
1751 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1752 vcpu->arch.cputm_enabled = true;
1753 __start_cpu_timer_accounting(vcpu);
1754}
1755
1756/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1757static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1758{
1759 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1760 __stop_cpu_timer_accounting(vcpu);
1761 vcpu->arch.cputm_enabled = false;
1762}
1763
1764static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1765{
1766 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1767 __enable_cpu_timer_accounting(vcpu);
1768 preempt_enable();
1769}
1770
1771static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1772{
1773 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1774 __disable_cpu_timer_accounting(vcpu);
1775 preempt_enable();
1776}
1777
1778/* set the cpu timer - may only be called from the VCPU thread itself */
1779void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1780{
1781 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1782 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1783 if (vcpu->arch.cputm_enabled)
1784 vcpu->arch.cputm_start = get_tod_clock_fast();
1785 vcpu->arch.sie_block->cputm = cputm;
1786 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1787 preempt_enable();
1788}
1789
1790/* update and get the cpu timer - can also be called from other VCPU threads */
1791__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1792{
1793 unsigned int seq;
1794 __u64 value;
1795
1796 if (unlikely(!vcpu->arch.cputm_enabled))
1797 return vcpu->arch.sie_block->cputm;
1798
1799 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1800 do {
1801 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1802 /*
1803 * If the writer would ever execute a read in the critical
1804 * section, e.g. in irq context, we have a deadlock.
1805 */
1806 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1807 value = vcpu->arch.sie_block->cputm;
1808 /* if cputm_start is 0, accounting is being started/stopped */
1809 if (likely(vcpu->arch.cputm_start))
1810 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1811 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1812 preempt_enable();
1813 return value;
1814}
1815
1816void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1817{
1818
1819 gmap_enable(vcpu->arch.enabled_gmap);
1820 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1821 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1822 __start_cpu_timer_accounting(vcpu);
1823 vcpu->cpu = cpu;
1824}
1825
1826void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1827{
1828 vcpu->cpu = -1;
1829 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1830 __stop_cpu_timer_accounting(vcpu);
1831 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1832 vcpu->arch.enabled_gmap = gmap_get_enabled();
1833 gmap_disable(vcpu->arch.enabled_gmap);
1834
1835}
1836
1837static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1838{
1839 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1840 vcpu->arch.sie_block->gpsw.mask = 0UL;
1841 vcpu->arch.sie_block->gpsw.addr = 0UL;
1842 kvm_s390_set_prefix(vcpu, 0);
1843 kvm_s390_set_cpu_timer(vcpu, 0);
1844 vcpu->arch.sie_block->ckc = 0UL;
1845 vcpu->arch.sie_block->todpr = 0;
1846 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1847 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1848 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1849 /* make sure the new fpc will be lazily loaded */
1850 save_fpu_regs();
1851 current->thread.fpu.fpc = 0;
1852 vcpu->arch.sie_block->gbea = 1;
1853 vcpu->arch.sie_block->pp = 0;
1854 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1855 kvm_clear_async_pf_completion_queue(vcpu);
1856 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1857 kvm_s390_vcpu_stop(vcpu);
1858 kvm_s390_clear_local_irqs(vcpu);
1859}
1860
1861void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1862{
1863 mutex_lock(&vcpu->kvm->lock);
1864 preempt_disable();
1865 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1866 preempt_enable();
1867 mutex_unlock(&vcpu->kvm->lock);
1868 if (!kvm_is_ucontrol(vcpu->kvm)) {
1869 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1870 sca_add_vcpu(vcpu);
1871 }
1872 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1873 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1874 /* make vcpu_load load the right gmap on the first trigger */
1875 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1876}
1877
1878static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1879{
1880 if (!test_kvm_facility(vcpu->kvm, 76))
1881 return;
1882
1883 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1884
1885 if (vcpu->kvm->arch.crypto.aes_kw)
1886 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1887 if (vcpu->kvm->arch.crypto.dea_kw)
1888 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1889
1890 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1891}
1892
1893void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1894{
1895 free_page(vcpu->arch.sie_block->cbrlo);
1896 vcpu->arch.sie_block->cbrlo = 0;
1897}
1898
1899int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1900{
1901 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1902 if (!vcpu->arch.sie_block->cbrlo)
1903 return -ENOMEM;
1904
1905 vcpu->arch.sie_block->ecb2 |= 0x80;
1906 vcpu->arch.sie_block->ecb2 &= ~0x08;
1907 return 0;
1908}
1909
1910static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1911{
1912 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1913
1914 vcpu->arch.sie_block->ibc = model->ibc;
1915 if (test_kvm_facility(vcpu->kvm, 7))
1916 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1917}
1918
1919int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1920{
1921 int rc = 0;
1922
1923 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1924 CPUSTAT_SM |
1925 CPUSTAT_STOPPED);
1926
1927 if (test_kvm_facility(vcpu->kvm, 78))
1928 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1929 else if (test_kvm_facility(vcpu->kvm, 8))
1930 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1931
1932 kvm_s390_vcpu_setup_model(vcpu);
1933
1934 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1935 if (MACHINE_HAS_ESOP)
1936 vcpu->arch.sie_block->ecb |= 0x02;
1937 if (test_kvm_facility(vcpu->kvm, 9))
1938 vcpu->arch.sie_block->ecb |= 0x04;
1939 if (test_kvm_facility(vcpu->kvm, 73))
1940 vcpu->arch.sie_block->ecb |= 0x10;
1941
1942 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1943 vcpu->arch.sie_block->ecb2 |= 0x08;
1944 vcpu->arch.sie_block->eca = 0x1002000U;
1945 if (sclp.has_cei)
1946 vcpu->arch.sie_block->eca |= 0x80000000U;
1947 if (sclp.has_ib)
1948 vcpu->arch.sie_block->eca |= 0x40000000U;
1949 if (sclp.has_siif)
1950 vcpu->arch.sie_block->eca |= 1;
1951 if (sclp.has_sigpif)
1952 vcpu->arch.sie_block->eca |= 0x10000000U;
1953 if (test_kvm_facility(vcpu->kvm, 129)) {
1954 vcpu->arch.sie_block->eca |= 0x00020000;
1955 vcpu->arch.sie_block->ecd |= 0x20000000;
1956 }
1957 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1958 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1959
1960 if (vcpu->kvm->arch.use_cmma) {
1961 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1962 if (rc)
1963 return rc;
1964 }
1965 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1966 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1967
1968 kvm_s390_vcpu_crypto_setup(vcpu);
1969
1970 return rc;
1971}
1972
1973struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1974 unsigned int id)
1975{
1976 struct kvm_vcpu *vcpu;
1977 struct sie_page *sie_page;
1978 int rc = -EINVAL;
1979
1980 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1981 goto out;
1982
1983 rc = -ENOMEM;
1984
1985 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1986 if (!vcpu)
1987 goto out;
1988
1989 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1990 if (!sie_page)
1991 goto out_free_cpu;
1992
1993 vcpu->arch.sie_block = &sie_page->sie_block;
1994 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1995
1996 /* the real guest size will always be smaller than msl */
1997 vcpu->arch.sie_block->mso = 0;
1998 vcpu->arch.sie_block->msl = sclp.hamax;
1999
2000 vcpu->arch.sie_block->icpua = id;
2001 spin_lock_init(&vcpu->arch.local_int.lock);
2002 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2003 vcpu->arch.local_int.wq = &vcpu->wq;
2004 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2005 seqcount_init(&vcpu->arch.cputm_seqcount);
2006
2007 rc = kvm_vcpu_init(vcpu, kvm, id);
2008 if (rc)
2009 goto out_free_sie_block;
2010 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2011 vcpu->arch.sie_block);
2012 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2013
2014 return vcpu;
2015out_free_sie_block:
2016 free_page((unsigned long)(vcpu->arch.sie_block));
2017out_free_cpu:
2018 kmem_cache_free(kvm_vcpu_cache, vcpu);
2019out:
2020 return ERR_PTR(rc);
2021}
2022
2023int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2024{
2025 return kvm_s390_vcpu_has_irq(vcpu, 0);
2026}
2027
2028void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2029{
2030 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2031 exit_sie(vcpu);
2032}
2033
2034void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2035{
2036 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2037}
2038
2039static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2040{
2041 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2042 exit_sie(vcpu);
2043}
2044
2045static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2046{
2047 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2048}
2049
2050/*
2051 * Kick a guest cpu out of SIE and wait until SIE is not running.
2052 * If the CPU is not running (e.g. waiting as idle) the function will
2053 * return immediately. */
2054void exit_sie(struct kvm_vcpu *vcpu)
2055{
2056 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2057 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2058 cpu_relax();
2059}
2060
2061/* Kick a guest cpu out of SIE to process a request synchronously */
2062void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2063{
2064 kvm_make_request(req, vcpu);
2065 kvm_s390_vcpu_request(vcpu);
2066}
2067
2068static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2069 unsigned long end)
2070{
2071 struct kvm *kvm = gmap->private;
2072 struct kvm_vcpu *vcpu;
2073 unsigned long prefix;
2074 int i;
2075
2076 if (gmap_is_shadow(gmap))
2077 return;
2078 if (start >= 1UL << 31)
2079 /* We are only interested in prefix pages */
2080 return;
2081 kvm_for_each_vcpu(i, vcpu, kvm) {
2082 /* match against both prefix pages */
2083 prefix = kvm_s390_get_prefix(vcpu);
2084 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2085 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2086 start, end);
2087 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2088 }
2089 }
2090}
2091
2092int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2093{
2094 /* kvm common code refers to this, but never calls it */
2095 BUG();
2096 return 0;
2097}
2098
2099static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2100 struct kvm_one_reg *reg)
2101{
2102 int r = -EINVAL;
2103
2104 switch (reg->id) {
2105 case KVM_REG_S390_TODPR:
2106 r = put_user(vcpu->arch.sie_block->todpr,
2107 (u32 __user *)reg->addr);
2108 break;
2109 case KVM_REG_S390_EPOCHDIFF:
2110 r = put_user(vcpu->arch.sie_block->epoch,
2111 (u64 __user *)reg->addr);
2112 break;
2113 case KVM_REG_S390_CPU_TIMER:
2114 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2115 (u64 __user *)reg->addr);
2116 break;
2117 case KVM_REG_S390_CLOCK_COMP:
2118 r = put_user(vcpu->arch.sie_block->ckc,
2119 (u64 __user *)reg->addr);
2120 break;
2121 case KVM_REG_S390_PFTOKEN:
2122 r = put_user(vcpu->arch.pfault_token,
2123 (u64 __user *)reg->addr);
2124 break;
2125 case KVM_REG_S390_PFCOMPARE:
2126 r = put_user(vcpu->arch.pfault_compare,
2127 (u64 __user *)reg->addr);
2128 break;
2129 case KVM_REG_S390_PFSELECT:
2130 r = put_user(vcpu->arch.pfault_select,
2131 (u64 __user *)reg->addr);
2132 break;
2133 case KVM_REG_S390_PP:
2134 r = put_user(vcpu->arch.sie_block->pp,
2135 (u64 __user *)reg->addr);
2136 break;
2137 case KVM_REG_S390_GBEA:
2138 r = put_user(vcpu->arch.sie_block->gbea,
2139 (u64 __user *)reg->addr);
2140 break;
2141 default:
2142 break;
2143 }
2144
2145 return r;
2146}
2147
2148static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2149 struct kvm_one_reg *reg)
2150{
2151 int r = -EINVAL;
2152 __u64 val;
2153
2154 switch (reg->id) {
2155 case KVM_REG_S390_TODPR:
2156 r = get_user(vcpu->arch.sie_block->todpr,
2157 (u32 __user *)reg->addr);
2158 break;
2159 case KVM_REG_S390_EPOCHDIFF:
2160 r = get_user(vcpu->arch.sie_block->epoch,
2161 (u64 __user *)reg->addr);
2162 break;
2163 case KVM_REG_S390_CPU_TIMER:
2164 r = get_user(val, (u64 __user *)reg->addr);
2165 if (!r)
2166 kvm_s390_set_cpu_timer(vcpu, val);
2167 break;
2168 case KVM_REG_S390_CLOCK_COMP:
2169 r = get_user(vcpu->arch.sie_block->ckc,
2170 (u64 __user *)reg->addr);
2171 break;
2172 case KVM_REG_S390_PFTOKEN:
2173 r = get_user(vcpu->arch.pfault_token,
2174 (u64 __user *)reg->addr);
2175 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2176 kvm_clear_async_pf_completion_queue(vcpu);
2177 break;
2178 case KVM_REG_S390_PFCOMPARE:
2179 r = get_user(vcpu->arch.pfault_compare,
2180 (u64 __user *)reg->addr);
2181 break;
2182 case KVM_REG_S390_PFSELECT:
2183 r = get_user(vcpu->arch.pfault_select,
2184 (u64 __user *)reg->addr);
2185 break;
2186 case KVM_REG_S390_PP:
2187 r = get_user(vcpu->arch.sie_block->pp,
2188 (u64 __user *)reg->addr);
2189 break;
2190 case KVM_REG_S390_GBEA:
2191 r = get_user(vcpu->arch.sie_block->gbea,
2192 (u64 __user *)reg->addr);
2193 break;
2194 default:
2195 break;
2196 }
2197
2198 return r;
2199}
2200
2201static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2202{
2203 kvm_s390_vcpu_initial_reset(vcpu);
2204 return 0;
2205}
2206
2207int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2208{
2209 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2210 return 0;
2211}
2212
2213int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2214{
2215 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2216 return 0;
2217}
2218
2219int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2220 struct kvm_sregs *sregs)
2221{
2222 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2223 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2224 return 0;
2225}
2226
2227int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2228 struct kvm_sregs *sregs)
2229{
2230 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2231 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2232 return 0;
2233}
2234
2235int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2236{
2237 if (test_fp_ctl(fpu->fpc))
2238 return -EINVAL;
2239 vcpu->run->s.regs.fpc = fpu->fpc;
2240 if (MACHINE_HAS_VX)
2241 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2242 (freg_t *) fpu->fprs);
2243 else
2244 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2245 return 0;
2246}
2247
2248int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2249{
2250 /* make sure we have the latest values */
2251 save_fpu_regs();
2252 if (MACHINE_HAS_VX)
2253 convert_vx_to_fp((freg_t *) fpu->fprs,
2254 (__vector128 *) vcpu->run->s.regs.vrs);
2255 else
2256 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2257 fpu->fpc = vcpu->run->s.regs.fpc;
2258 return 0;
2259}
2260
2261static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2262{
2263 int rc = 0;
2264
2265 if (!is_vcpu_stopped(vcpu))
2266 rc = -EBUSY;
2267 else {
2268 vcpu->run->psw_mask = psw.mask;
2269 vcpu->run->psw_addr = psw.addr;
2270 }
2271 return rc;
2272}
2273
2274int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2275 struct kvm_translation *tr)
2276{
2277 return -EINVAL; /* not implemented yet */
2278}
2279
2280#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2281 KVM_GUESTDBG_USE_HW_BP | \
2282 KVM_GUESTDBG_ENABLE)
2283
2284int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2285 struct kvm_guest_debug *dbg)
2286{
2287 int rc = 0;
2288
2289 vcpu->guest_debug = 0;
2290 kvm_s390_clear_bp_data(vcpu);
2291
2292 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2293 return -EINVAL;
2294 if (!sclp.has_gpere)
2295 return -EINVAL;
2296
2297 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2298 vcpu->guest_debug = dbg->control;
2299 /* enforce guest PER */
2300 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2301
2302 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2303 rc = kvm_s390_import_bp_data(vcpu, dbg);
2304 } else {
2305 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2306 vcpu->arch.guestdbg.last_bp = 0;
2307 }
2308
2309 if (rc) {
2310 vcpu->guest_debug = 0;
2311 kvm_s390_clear_bp_data(vcpu);
2312 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2313 }
2314
2315 return rc;
2316}
2317
2318int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2319 struct kvm_mp_state *mp_state)
2320{
2321 /* CHECK_STOP and LOAD are not supported yet */
2322 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2323 KVM_MP_STATE_OPERATING;
2324}
2325
2326int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2327 struct kvm_mp_state *mp_state)
2328{
2329 int rc = 0;
2330
2331 /* user space knows about this interface - let it control the state */
2332 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2333
2334 switch (mp_state->mp_state) {
2335 case KVM_MP_STATE_STOPPED:
2336 kvm_s390_vcpu_stop(vcpu);
2337 break;
2338 case KVM_MP_STATE_OPERATING:
2339 kvm_s390_vcpu_start(vcpu);
2340 break;
2341 case KVM_MP_STATE_LOAD:
2342 case KVM_MP_STATE_CHECK_STOP:
2343 /* fall through - CHECK_STOP and LOAD are not supported yet */
2344 default:
2345 rc = -ENXIO;
2346 }
2347
2348 return rc;
2349}
2350
2351static bool ibs_enabled(struct kvm_vcpu *vcpu)
2352{
2353 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2354}
2355
2356static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2357{
2358retry:
2359 kvm_s390_vcpu_request_handled(vcpu);
2360 if (!vcpu->requests)
2361 return 0;
2362 /*
2363 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2364 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2365 * This ensures that the ipte instruction for this request has
2366 * already finished. We might race against a second unmapper that
2367 * wants to set the blocking bit. Lets just retry the request loop.
2368 */
2369 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2370 int rc;
2371 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2372 kvm_s390_get_prefix(vcpu),
2373 PAGE_SIZE * 2, PROT_WRITE);
2374 if (rc) {
2375 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2376 return rc;
2377 }
2378 goto retry;
2379 }
2380
2381 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2382 vcpu->arch.sie_block->ihcpu = 0xffff;
2383 goto retry;
2384 }
2385
2386 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2387 if (!ibs_enabled(vcpu)) {
2388 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2389 atomic_or(CPUSTAT_IBS,
2390 &vcpu->arch.sie_block->cpuflags);
2391 }
2392 goto retry;
2393 }
2394
2395 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2396 if (ibs_enabled(vcpu)) {
2397 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2398 atomic_andnot(CPUSTAT_IBS,
2399 &vcpu->arch.sie_block->cpuflags);
2400 }
2401 goto retry;
2402 }
2403
2404 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2405 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2406 goto retry;
2407 }
2408
2409 /* nothing to do, just clear the request */
2410 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2411
2412 return 0;
2413}
2414
2415void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2416{
2417 struct kvm_vcpu *vcpu;
2418 int i;
2419
2420 mutex_lock(&kvm->lock);
2421 preempt_disable();
2422 kvm->arch.epoch = tod - get_tod_clock();
2423 kvm_s390_vcpu_block_all(kvm);
2424 kvm_for_each_vcpu(i, vcpu, kvm)
2425 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2426 kvm_s390_vcpu_unblock_all(kvm);
2427 preempt_enable();
2428 mutex_unlock(&kvm->lock);
2429}
2430
2431/**
2432 * kvm_arch_fault_in_page - fault-in guest page if necessary
2433 * @vcpu: The corresponding virtual cpu
2434 * @gpa: Guest physical address
2435 * @writable: Whether the page should be writable or not
2436 *
2437 * Make sure that a guest page has been faulted-in on the host.
2438 *
2439 * Return: Zero on success, negative error code otherwise.
2440 */
2441long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2442{
2443 return gmap_fault(vcpu->arch.gmap, gpa,
2444 writable ? FAULT_FLAG_WRITE : 0);
2445}
2446
2447static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2448 unsigned long token)
2449{
2450 struct kvm_s390_interrupt inti;
2451 struct kvm_s390_irq irq;
2452
2453 if (start_token) {
2454 irq.u.ext.ext_params2 = token;
2455 irq.type = KVM_S390_INT_PFAULT_INIT;
2456 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2457 } else {
2458 inti.type = KVM_S390_INT_PFAULT_DONE;
2459 inti.parm64 = token;
2460 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2461 }
2462}
2463
2464void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2465 struct kvm_async_pf *work)
2466{
2467 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2468 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2469}
2470
2471void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2472 struct kvm_async_pf *work)
2473{
2474 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2475 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2476}
2477
2478void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2479 struct kvm_async_pf *work)
2480{
2481 /* s390 will always inject the page directly */
2482}
2483
2484bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2485{
2486 /*
2487 * s390 will always inject the page directly,
2488 * but we still want check_async_completion to cleanup
2489 */
2490 return true;
2491}
2492
2493static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2494{
2495 hva_t hva;
2496 struct kvm_arch_async_pf arch;
2497 int rc;
2498
2499 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2500 return 0;
2501 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2502 vcpu->arch.pfault_compare)
2503 return 0;
2504 if (psw_extint_disabled(vcpu))
2505 return 0;
2506 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2507 return 0;
2508 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2509 return 0;
2510 if (!vcpu->arch.gmap->pfault_enabled)
2511 return 0;
2512
2513 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2514 hva += current->thread.gmap_addr & ~PAGE_MASK;
2515 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2516 return 0;
2517
2518 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2519 return rc;
2520}
2521
2522static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2523{
2524 int rc, cpuflags;
2525
2526 /*
2527 * On s390 notifications for arriving pages will be delivered directly
2528 * to the guest but the house keeping for completed pfaults is
2529 * handled outside the worker.
2530 */
2531 kvm_check_async_pf_completion(vcpu);
2532
2533 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2534 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2535
2536 if (need_resched())
2537 schedule();
2538
2539 if (test_cpu_flag(CIF_MCCK_PENDING))
2540 s390_handle_mcck();
2541
2542 if (!kvm_is_ucontrol(vcpu->kvm)) {
2543 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2544 if (rc)
2545 return rc;
2546 }
2547
2548 rc = kvm_s390_handle_requests(vcpu);
2549 if (rc)
2550 return rc;
2551
2552 if (guestdbg_enabled(vcpu)) {
2553 kvm_s390_backup_guest_per_regs(vcpu);
2554 kvm_s390_patch_guest_per_regs(vcpu);
2555 }
2556
2557 vcpu->arch.sie_block->icptcode = 0;
2558 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2559 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2560 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2561
2562 return 0;
2563}
2564
2565static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2566{
2567 struct kvm_s390_pgm_info pgm_info = {
2568 .code = PGM_ADDRESSING,
2569 };
2570 u8 opcode, ilen;
2571 int rc;
2572
2573 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2574 trace_kvm_s390_sie_fault(vcpu);
2575
2576 /*
2577 * We want to inject an addressing exception, which is defined as a
2578 * suppressing or terminating exception. However, since we came here
2579 * by a DAT access exception, the PSW still points to the faulting
2580 * instruction since DAT exceptions are nullifying. So we've got
2581 * to look up the current opcode to get the length of the instruction
2582 * to be able to forward the PSW.
2583 */
2584 rc = read_guest_instr(vcpu, &opcode, 1);
2585 ilen = insn_length(opcode);
2586 if (rc < 0) {
2587 return rc;
2588 } else if (rc) {
2589 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2590 * Forward by arbitrary ilc, injection will take care of
2591 * nullification if necessary.
2592 */
2593 pgm_info = vcpu->arch.pgm;
2594 ilen = 4;
2595 }
2596 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2597 kvm_s390_forward_psw(vcpu, ilen);
2598 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2599}
2600
2601static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2602{
2603 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2604 vcpu->arch.sie_block->icptcode);
2605 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2606
2607 if (guestdbg_enabled(vcpu))
2608 kvm_s390_restore_guest_per_regs(vcpu);
2609
2610 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2611 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2612
2613 if (vcpu->arch.sie_block->icptcode > 0) {
2614 int rc = kvm_handle_sie_intercept(vcpu);
2615
2616 if (rc != -EOPNOTSUPP)
2617 return rc;
2618 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2619 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2620 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2621 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2622 return -EREMOTE;
2623 } else if (exit_reason != -EFAULT) {
2624 vcpu->stat.exit_null++;
2625 return 0;
2626 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2627 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2628 vcpu->run->s390_ucontrol.trans_exc_code =
2629 current->thread.gmap_addr;
2630 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2631 return -EREMOTE;
2632 } else if (current->thread.gmap_pfault) {
2633 trace_kvm_s390_major_guest_pfault(vcpu);
2634 current->thread.gmap_pfault = 0;
2635 if (kvm_arch_setup_async_pf(vcpu))
2636 return 0;
2637 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2638 }
2639 return vcpu_post_run_fault_in_sie(vcpu);
2640}
2641
2642static int __vcpu_run(struct kvm_vcpu *vcpu)
2643{
2644 int rc, exit_reason;
2645
2646 /*
2647 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2648 * ning the guest), so that memslots (and other stuff) are protected
2649 */
2650 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2651
2652 do {
2653 rc = vcpu_pre_run(vcpu);
2654 if (rc)
2655 break;
2656
2657 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2658 /*
2659 * As PF_VCPU will be used in fault handler, between
2660 * guest_enter and guest_exit should be no uaccess.
2661 */
2662 local_irq_disable();
2663 guest_enter_irqoff();
2664 __disable_cpu_timer_accounting(vcpu);
2665 local_irq_enable();
2666 exit_reason = sie64a(vcpu->arch.sie_block,
2667 vcpu->run->s.regs.gprs);
2668 local_irq_disable();
2669 __enable_cpu_timer_accounting(vcpu);
2670 guest_exit_irqoff();
2671 local_irq_enable();
2672 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2673
2674 rc = vcpu_post_run(vcpu, exit_reason);
2675 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2676
2677 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2678 return rc;
2679}
2680
2681static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2682{
2683 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2684 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2685 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2686 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2687 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2688 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2689 /* some control register changes require a tlb flush */
2690 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2691 }
2692 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2693 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2694 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2695 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2696 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2697 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2698 }
2699 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2700 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2701 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2702 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2703 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2704 kvm_clear_async_pf_completion_queue(vcpu);
2705 }
2706 /*
2707 * If userspace sets the riccb (e.g. after migration) to a valid state,
2708 * we should enable RI here instead of doing the lazy enablement.
2709 */
2710 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2711 test_kvm_facility(vcpu->kvm, 64)) {
2712 struct runtime_instr_cb *riccb =
2713 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2714
2715 if (riccb->valid)
2716 vcpu->arch.sie_block->ecb3 |= 0x01;
2717 }
2718 save_access_regs(vcpu->arch.host_acrs);
2719 restore_access_regs(vcpu->run->s.regs.acrs);
2720 /* save host (userspace) fprs/vrs */
2721 save_fpu_regs();
2722 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2723 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2724 if (MACHINE_HAS_VX)
2725 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2726 else
2727 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2728 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2729 if (test_fp_ctl(current->thread.fpu.fpc))
2730 /* User space provided an invalid FPC, let's clear it */
2731 current->thread.fpu.fpc = 0;
2732
2733 kvm_run->kvm_dirty_regs = 0;
2734}
2735
2736static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2737{
2738 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2739 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2740 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2741 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2742 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2743 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2744 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2745 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2746 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2747 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2748 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2749 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2750 save_access_regs(vcpu->run->s.regs.acrs);
2751 restore_access_regs(vcpu->arch.host_acrs);
2752 /* Save guest register state */
2753 save_fpu_regs();
2754 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2755 /* Restore will be done lazily at return */
2756 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2757 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2758
2759}
2760
2761int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2762{
2763 int rc;
2764 sigset_t sigsaved;
2765
2766 if (guestdbg_exit_pending(vcpu)) {
2767 kvm_s390_prepare_debug_exit(vcpu);
2768 return 0;
2769 }
2770
2771 if (vcpu->sigset_active)
2772 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2773
2774 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2775 kvm_s390_vcpu_start(vcpu);
2776 } else if (is_vcpu_stopped(vcpu)) {
2777 pr_err_ratelimited("can't run stopped vcpu %d\n",
2778 vcpu->vcpu_id);
2779 return -EINVAL;
2780 }
2781
2782 sync_regs(vcpu, kvm_run);
2783 enable_cpu_timer_accounting(vcpu);
2784
2785 might_fault();
2786 rc = __vcpu_run(vcpu);
2787
2788 if (signal_pending(current) && !rc) {
2789 kvm_run->exit_reason = KVM_EXIT_INTR;
2790 rc = -EINTR;
2791 }
2792
2793 if (guestdbg_exit_pending(vcpu) && !rc) {
2794 kvm_s390_prepare_debug_exit(vcpu);
2795 rc = 0;
2796 }
2797
2798 if (rc == -EREMOTE) {
2799 /* userspace support is needed, kvm_run has been prepared */
2800 rc = 0;
2801 }
2802
2803 disable_cpu_timer_accounting(vcpu);
2804 store_regs(vcpu, kvm_run);
2805
2806 if (vcpu->sigset_active)
2807 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2808
2809 vcpu->stat.exit_userspace++;
2810 return rc;
2811}
2812
2813/*
2814 * store status at address
2815 * we use have two special cases:
2816 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2817 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2818 */
2819int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2820{
2821 unsigned char archmode = 1;
2822 freg_t fprs[NUM_FPRS];
2823 unsigned int px;
2824 u64 clkcomp, cputm;
2825 int rc;
2826
2827 px = kvm_s390_get_prefix(vcpu);
2828 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2829 if (write_guest_abs(vcpu, 163, &archmode, 1))
2830 return -EFAULT;
2831 gpa = 0;
2832 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2833 if (write_guest_real(vcpu, 163, &archmode, 1))
2834 return -EFAULT;
2835 gpa = px;
2836 } else
2837 gpa -= __LC_FPREGS_SAVE_AREA;
2838
2839 /* manually convert vector registers if necessary */
2840 if (MACHINE_HAS_VX) {
2841 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2842 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2843 fprs, 128);
2844 } else {
2845 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2846 vcpu->run->s.regs.fprs, 128);
2847 }
2848 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2849 vcpu->run->s.regs.gprs, 128);
2850 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2851 &vcpu->arch.sie_block->gpsw, 16);
2852 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2853 &px, 4);
2854 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2855 &vcpu->run->s.regs.fpc, 4);
2856 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2857 &vcpu->arch.sie_block->todpr, 4);
2858 cputm = kvm_s390_get_cpu_timer(vcpu);
2859 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2860 &cputm, 8);
2861 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2862 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2863 &clkcomp, 8);
2864 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2865 &vcpu->run->s.regs.acrs, 64);
2866 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2867 &vcpu->arch.sie_block->gcr, 128);
2868 return rc ? -EFAULT : 0;
2869}
2870
2871int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2872{
2873 /*
2874 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2875 * switch in the run ioctl. Let's update our copies before we save
2876 * it into the save area
2877 */
2878 save_fpu_regs();
2879 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2880 save_access_regs(vcpu->run->s.regs.acrs);
2881
2882 return kvm_s390_store_status_unloaded(vcpu, addr);
2883}
2884
2885static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2886{
2887 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2888 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2889}
2890
2891static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2892{
2893 unsigned int i;
2894 struct kvm_vcpu *vcpu;
2895
2896 kvm_for_each_vcpu(i, vcpu, kvm) {
2897 __disable_ibs_on_vcpu(vcpu);
2898 }
2899}
2900
2901static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2902{
2903 if (!sclp.has_ibs)
2904 return;
2905 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2906 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2907}
2908
2909void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2910{
2911 int i, online_vcpus, started_vcpus = 0;
2912
2913 if (!is_vcpu_stopped(vcpu))
2914 return;
2915
2916 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2917 /* Only one cpu at a time may enter/leave the STOPPED state. */
2918 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2919 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2920
2921 for (i = 0; i < online_vcpus; i++) {
2922 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2923 started_vcpus++;
2924 }
2925
2926 if (started_vcpus == 0) {
2927 /* we're the only active VCPU -> speed it up */
2928 __enable_ibs_on_vcpu(vcpu);
2929 } else if (started_vcpus == 1) {
2930 /*
2931 * As we are starting a second VCPU, we have to disable
2932 * the IBS facility on all VCPUs to remove potentially
2933 * oustanding ENABLE requests.
2934 */
2935 __disable_ibs_on_all_vcpus(vcpu->kvm);
2936 }
2937
2938 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2939 /*
2940 * Another VCPU might have used IBS while we were offline.
2941 * Let's play safe and flush the VCPU at startup.
2942 */
2943 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2944 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2945 return;
2946}
2947
2948void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2949{
2950 int i, online_vcpus, started_vcpus = 0;
2951 struct kvm_vcpu *started_vcpu = NULL;
2952
2953 if (is_vcpu_stopped(vcpu))
2954 return;
2955
2956 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2957 /* Only one cpu at a time may enter/leave the STOPPED state. */
2958 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2959 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2960
2961 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2962 kvm_s390_clear_stop_irq(vcpu);
2963
2964 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2965 __disable_ibs_on_vcpu(vcpu);
2966
2967 for (i = 0; i < online_vcpus; i++) {
2968 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2969 started_vcpus++;
2970 started_vcpu = vcpu->kvm->vcpus[i];
2971 }
2972 }
2973
2974 if (started_vcpus == 1) {
2975 /*
2976 * As we only have one VCPU left, we want to enable the
2977 * IBS facility for that VCPU to speed it up.
2978 */
2979 __enable_ibs_on_vcpu(started_vcpu);
2980 }
2981
2982 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2983 return;
2984}
2985
2986static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2987 struct kvm_enable_cap *cap)
2988{
2989 int r;
2990
2991 if (cap->flags)
2992 return -EINVAL;
2993
2994 switch (cap->cap) {
2995 case KVM_CAP_S390_CSS_SUPPORT:
2996 if (!vcpu->kvm->arch.css_support) {
2997 vcpu->kvm->arch.css_support = 1;
2998 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2999 trace_kvm_s390_enable_css(vcpu->kvm);
3000 }
3001 r = 0;
3002 break;
3003 default:
3004 r = -EINVAL;
3005 break;
3006 }
3007 return r;
3008}
3009
3010static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3011 struct kvm_s390_mem_op *mop)
3012{
3013 void __user *uaddr = (void __user *)mop->buf;
3014 void *tmpbuf = NULL;
3015 int r, srcu_idx;
3016 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3017 | KVM_S390_MEMOP_F_CHECK_ONLY;
3018
3019 if (mop->flags & ~supported_flags)
3020 return -EINVAL;
3021
3022 if (mop->size > MEM_OP_MAX_SIZE)
3023 return -E2BIG;
3024
3025 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3026 tmpbuf = vmalloc(mop->size);
3027 if (!tmpbuf)
3028 return -ENOMEM;
3029 }
3030
3031 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3032
3033 switch (mop->op) {
3034 case KVM_S390_MEMOP_LOGICAL_READ:
3035 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3036 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3037 mop->size, GACC_FETCH);
3038 break;
3039 }
3040 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3041 if (r == 0) {
3042 if (copy_to_user(uaddr, tmpbuf, mop->size))
3043 r = -EFAULT;
3044 }
3045 break;
3046 case KVM_S390_MEMOP_LOGICAL_WRITE:
3047 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3048 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3049 mop->size, GACC_STORE);
3050 break;
3051 }
3052 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3053 r = -EFAULT;
3054 break;
3055 }
3056 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3057 break;
3058 default:
3059 r = -EINVAL;
3060 }
3061
3062 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3063
3064 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3065 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3066
3067 vfree(tmpbuf);
3068 return r;
3069}
3070
3071long kvm_arch_vcpu_ioctl(struct file *filp,
3072 unsigned int ioctl, unsigned long arg)
3073{
3074 struct kvm_vcpu *vcpu = filp->private_data;
3075 void __user *argp = (void __user *)arg;
3076 int idx;
3077 long r;
3078
3079 switch (ioctl) {
3080 case KVM_S390_IRQ: {
3081 struct kvm_s390_irq s390irq;
3082
3083 r = -EFAULT;
3084 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3085 break;
3086 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3087 break;
3088 }
3089 case KVM_S390_INTERRUPT: {
3090 struct kvm_s390_interrupt s390int;
3091 struct kvm_s390_irq s390irq;
3092
3093 r = -EFAULT;
3094 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3095 break;
3096 if (s390int_to_s390irq(&s390int, &s390irq))
3097 return -EINVAL;
3098 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3099 break;
3100 }
3101 case KVM_S390_STORE_STATUS:
3102 idx = srcu_read_lock(&vcpu->kvm->srcu);
3103 r = kvm_s390_vcpu_store_status(vcpu, arg);
3104 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3105 break;
3106 case KVM_S390_SET_INITIAL_PSW: {
3107 psw_t psw;
3108
3109 r = -EFAULT;
3110 if (copy_from_user(&psw, argp, sizeof(psw)))
3111 break;
3112 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3113 break;
3114 }
3115 case KVM_S390_INITIAL_RESET:
3116 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3117 break;
3118 case KVM_SET_ONE_REG:
3119 case KVM_GET_ONE_REG: {
3120 struct kvm_one_reg reg;
3121 r = -EFAULT;
3122 if (copy_from_user(®, argp, sizeof(reg)))
3123 break;
3124 if (ioctl == KVM_SET_ONE_REG)
3125 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3126 else
3127 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3128 break;
3129 }
3130#ifdef CONFIG_KVM_S390_UCONTROL
3131 case KVM_S390_UCAS_MAP: {
3132 struct kvm_s390_ucas_mapping ucasmap;
3133
3134 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3135 r = -EFAULT;
3136 break;
3137 }
3138
3139 if (!kvm_is_ucontrol(vcpu->kvm)) {
3140 r = -EINVAL;
3141 break;
3142 }
3143
3144 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3145 ucasmap.vcpu_addr, ucasmap.length);
3146 break;
3147 }
3148 case KVM_S390_UCAS_UNMAP: {
3149 struct kvm_s390_ucas_mapping ucasmap;
3150
3151 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3152 r = -EFAULT;
3153 break;
3154 }
3155
3156 if (!kvm_is_ucontrol(vcpu->kvm)) {
3157 r = -EINVAL;
3158 break;
3159 }
3160
3161 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3162 ucasmap.length);
3163 break;
3164 }
3165#endif
3166 case KVM_S390_VCPU_FAULT: {
3167 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3168 break;
3169 }
3170 case KVM_ENABLE_CAP:
3171 {
3172 struct kvm_enable_cap cap;
3173 r = -EFAULT;
3174 if (copy_from_user(&cap, argp, sizeof(cap)))
3175 break;
3176 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3177 break;
3178 }
3179 case KVM_S390_MEM_OP: {
3180 struct kvm_s390_mem_op mem_op;
3181
3182 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3183 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3184 else
3185 r = -EFAULT;
3186 break;
3187 }
3188 case KVM_S390_SET_IRQ_STATE: {
3189 struct kvm_s390_irq_state irq_state;
3190
3191 r = -EFAULT;
3192 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3193 break;
3194 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3195 irq_state.len == 0 ||
3196 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3197 r = -EINVAL;
3198 break;
3199 }
3200 r = kvm_s390_set_irq_state(vcpu,
3201 (void __user *) irq_state.buf,
3202 irq_state.len);
3203 break;
3204 }
3205 case KVM_S390_GET_IRQ_STATE: {
3206 struct kvm_s390_irq_state irq_state;
3207
3208 r = -EFAULT;
3209 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3210 break;
3211 if (irq_state.len == 0) {
3212 r = -EINVAL;
3213 break;
3214 }
3215 r = kvm_s390_get_irq_state(vcpu,
3216 (__u8 __user *) irq_state.buf,
3217 irq_state.len);
3218 break;
3219 }
3220 default:
3221 r = -ENOTTY;
3222 }
3223 return r;
3224}
3225
3226int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3227{
3228#ifdef CONFIG_KVM_S390_UCONTROL
3229 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3230 && (kvm_is_ucontrol(vcpu->kvm))) {
3231 vmf->page = virt_to_page(vcpu->arch.sie_block);
3232 get_page(vmf->page);
3233 return 0;
3234 }
3235#endif
3236 return VM_FAULT_SIGBUS;
3237}
3238
3239int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3240 unsigned long npages)
3241{
3242 return 0;
3243}
3244
3245/* Section: memory related */
3246int kvm_arch_prepare_memory_region(struct kvm *kvm,
3247 struct kvm_memory_slot *memslot,
3248 const struct kvm_userspace_memory_region *mem,
3249 enum kvm_mr_change change)
3250{
3251 /* A few sanity checks. We can have memory slots which have to be
3252 located/ended at a segment boundary (1MB). The memory in userland is
3253 ok to be fragmented into various different vmas. It is okay to mmap()
3254 and munmap() stuff in this slot after doing this call at any time */
3255
3256 if (mem->userspace_addr & 0xffffful)
3257 return -EINVAL;
3258
3259 if (mem->memory_size & 0xffffful)
3260 return -EINVAL;
3261
3262 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3263 return -EINVAL;
3264
3265 return 0;
3266}
3267
3268void kvm_arch_commit_memory_region(struct kvm *kvm,
3269 const struct kvm_userspace_memory_region *mem,
3270 const struct kvm_memory_slot *old,
3271 const struct kvm_memory_slot *new,
3272 enum kvm_mr_change change)
3273{
3274 int rc;
3275
3276 /* If the basics of the memslot do not change, we do not want
3277 * to update the gmap. Every update causes several unnecessary
3278 * segment translation exceptions. This is usually handled just
3279 * fine by the normal fault handler + gmap, but it will also
3280 * cause faults on the prefix page of running guest CPUs.
3281 */
3282 if (old->userspace_addr == mem->userspace_addr &&
3283 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3284 old->npages * PAGE_SIZE == mem->memory_size)
3285 return;
3286
3287 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3288 mem->guest_phys_addr, mem->memory_size);
3289 if (rc)
3290 pr_warn("failed to commit memory region\n");
3291 return;
3292}
3293
3294static inline unsigned long nonhyp_mask(int i)
3295{
3296 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3297
3298 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3299}
3300
3301void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3302{
3303 vcpu->valid_wakeup = false;
3304}
3305
3306static int __init kvm_s390_init(void)
3307{
3308 int i;
3309
3310 if (!sclp.has_sief2) {
3311 pr_info("SIE not available\n");
3312 return -ENODEV;
3313 }
3314
3315 for (i = 0; i < 16; i++)
3316 kvm_s390_fac_list_mask[i] |=
3317 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3318
3319 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3320}
3321
3322static void __exit kvm_s390_exit(void)
3323{
3324 kvm_exit();
3325}
3326
3327module_init(kvm_s390_init);
3328module_exit(kvm_s390_exit);
3329
3330/*
3331 * Enable autoloading of the kvm module.
3332 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3333 * since x86 takes a different approach.
3334 */
3335#include <linux/miscdevice.h>
3336MODULE_ALIAS_MISCDEV(KVM_MINOR);
3337MODULE_ALIAS("devname:kvm");
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Christian Ehrhardt <ehrhardt@de.ibm.com>
10 * Jason J. Herne <jjherne@us.ibm.com>
11 */
12
13#define KMSG_COMPONENT "kvm-s390"
14#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15
16#include <linux/compiler.h>
17#include <linux/err.h>
18#include <linux/fs.h>
19#include <linux/hrtimer.h>
20#include <linux/init.h>
21#include <linux/kvm.h>
22#include <linux/kvm_host.h>
23#include <linux/mman.h>
24#include <linux/module.h>
25#include <linux/moduleparam.h>
26#include <linux/random.h>
27#include <linux/slab.h>
28#include <linux/timer.h>
29#include <linux/vmalloc.h>
30#include <linux/bitmap.h>
31#include <linux/sched/signal.h>
32#include <linux/string.h>
33#include <linux/pgtable.h>
34#include <linux/mmu_notifier.h>
35
36#include <asm/access-regs.h>
37#include <asm/asm-offsets.h>
38#include <asm/lowcore.h>
39#include <asm/stp.h>
40#include <asm/gmap.h>
41#include <asm/nmi.h>
42#include <asm/isc.h>
43#include <asm/sclp.h>
44#include <asm/cpacf.h>
45#include <asm/timex.h>
46#include <asm/asm.h>
47#include <asm/fpu.h>
48#include <asm/ap.h>
49#include <asm/uv.h>
50#include "kvm-s390.h"
51#include "gaccess.h"
52#include "pci.h"
53
54#define CREATE_TRACE_POINTS
55#include "trace.h"
56#include "trace-s390.h"
57
58#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
59#define LOCAL_IRQS 32
60#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
61 (KVM_MAX_VCPUS + LOCAL_IRQS))
62
63const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
64 KVM_GENERIC_VM_STATS(),
65 STATS_DESC_COUNTER(VM, inject_io),
66 STATS_DESC_COUNTER(VM, inject_float_mchk),
67 STATS_DESC_COUNTER(VM, inject_pfault_done),
68 STATS_DESC_COUNTER(VM, inject_service_signal),
69 STATS_DESC_COUNTER(VM, inject_virtio),
70 STATS_DESC_COUNTER(VM, aen_forward),
71 STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
72 STATS_DESC_COUNTER(VM, gmap_shadow_create),
73 STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
74 STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
75 STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
76 STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
77 STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
78};
79
80const struct kvm_stats_header kvm_vm_stats_header = {
81 .name_size = KVM_STATS_NAME_SIZE,
82 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
83 .id_offset = sizeof(struct kvm_stats_header),
84 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
85 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
86 sizeof(kvm_vm_stats_desc),
87};
88
89const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
90 KVM_GENERIC_VCPU_STATS(),
91 STATS_DESC_COUNTER(VCPU, exit_userspace),
92 STATS_DESC_COUNTER(VCPU, exit_null),
93 STATS_DESC_COUNTER(VCPU, exit_external_request),
94 STATS_DESC_COUNTER(VCPU, exit_io_request),
95 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
96 STATS_DESC_COUNTER(VCPU, exit_stop_request),
97 STATS_DESC_COUNTER(VCPU, exit_validity),
98 STATS_DESC_COUNTER(VCPU, exit_instruction),
99 STATS_DESC_COUNTER(VCPU, exit_pei),
100 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
101 STATS_DESC_COUNTER(VCPU, instruction_lctl),
102 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
103 STATS_DESC_COUNTER(VCPU, instruction_stctl),
104 STATS_DESC_COUNTER(VCPU, instruction_stctg),
105 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
106 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
107 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
108 STATS_DESC_COUNTER(VCPU, deliver_ckc),
109 STATS_DESC_COUNTER(VCPU, deliver_cputm),
110 STATS_DESC_COUNTER(VCPU, deliver_external_call),
111 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
112 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
113 STATS_DESC_COUNTER(VCPU, deliver_virtio),
114 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
115 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
116 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
117 STATS_DESC_COUNTER(VCPU, deliver_program),
118 STATS_DESC_COUNTER(VCPU, deliver_io),
119 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
120 STATS_DESC_COUNTER(VCPU, exit_wait_state),
121 STATS_DESC_COUNTER(VCPU, inject_ckc),
122 STATS_DESC_COUNTER(VCPU, inject_cputm),
123 STATS_DESC_COUNTER(VCPU, inject_external_call),
124 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
125 STATS_DESC_COUNTER(VCPU, inject_mchk),
126 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
127 STATS_DESC_COUNTER(VCPU, inject_program),
128 STATS_DESC_COUNTER(VCPU, inject_restart),
129 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
130 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
131 STATS_DESC_COUNTER(VCPU, instruction_epsw),
132 STATS_DESC_COUNTER(VCPU, instruction_gs),
133 STATS_DESC_COUNTER(VCPU, instruction_io_other),
134 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
135 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
136 STATS_DESC_COUNTER(VCPU, instruction_lpswey),
137 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
138 STATS_DESC_COUNTER(VCPU, instruction_ptff),
139 STATS_DESC_COUNTER(VCPU, instruction_sck),
140 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
141 STATS_DESC_COUNTER(VCPU, instruction_stidp),
142 STATS_DESC_COUNTER(VCPU, instruction_spx),
143 STATS_DESC_COUNTER(VCPU, instruction_stpx),
144 STATS_DESC_COUNTER(VCPU, instruction_stap),
145 STATS_DESC_COUNTER(VCPU, instruction_iske),
146 STATS_DESC_COUNTER(VCPU, instruction_ri),
147 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
148 STATS_DESC_COUNTER(VCPU, instruction_sske),
149 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
150 STATS_DESC_COUNTER(VCPU, instruction_stsi),
151 STATS_DESC_COUNTER(VCPU, instruction_stfl),
152 STATS_DESC_COUNTER(VCPU, instruction_tb),
153 STATS_DESC_COUNTER(VCPU, instruction_tpi),
154 STATS_DESC_COUNTER(VCPU, instruction_tprot),
155 STATS_DESC_COUNTER(VCPU, instruction_tsch),
156 STATS_DESC_COUNTER(VCPU, instruction_sie),
157 STATS_DESC_COUNTER(VCPU, instruction_essa),
158 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
164 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
165 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
166 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
167 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
168 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
169 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
170 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
171 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
172 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
173 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
174 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
175 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
176 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
177 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
178 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
179 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
180 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
181 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
182 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
183 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
184 STATS_DESC_COUNTER(VCPU, pfault_sync)
185};
186
187const struct kvm_stats_header kvm_vcpu_stats_header = {
188 .name_size = KVM_STATS_NAME_SIZE,
189 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
190 .id_offset = sizeof(struct kvm_stats_header),
191 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
192 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
193 sizeof(kvm_vcpu_stats_desc),
194};
195
196/* allow nested virtualization in KVM (if enabled by user space) */
197static int nested;
198module_param(nested, int, S_IRUGO);
199MODULE_PARM_DESC(nested, "Nested virtualization support");
200
201/* allow 1m huge page guest backing, if !nested */
202static int hpage;
203module_param(hpage, int, 0444);
204MODULE_PARM_DESC(hpage, "1m huge page backing support");
205
206/* maximum percentage of steal time for polling. >100 is treated like 100 */
207static u8 halt_poll_max_steal = 10;
208module_param(halt_poll_max_steal, byte, 0644);
209MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
210
211/* if set to true, the GISA will be initialized and used if available */
212static bool use_gisa = true;
213module_param(use_gisa, bool, 0644);
214MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
215
216/* maximum diag9c forwarding per second */
217unsigned int diag9c_forwarding_hz;
218module_param(diag9c_forwarding_hz, uint, 0644);
219MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
220
221/*
222 * allow asynchronous deinit for protected guests; enable by default since
223 * the feature is opt-in anyway
224 */
225static int async_destroy = 1;
226module_param(async_destroy, int, 0444);
227MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
228
229/*
230 * For now we handle at most 16 double words as this is what the s390 base
231 * kernel handles and stores in the prefix page. If we ever need to go beyond
232 * this, this requires changes to code, but the external uapi can stay.
233 */
234#define SIZE_INTERNAL 16
235
236/*
237 * Base feature mask that defines default mask for facilities. Consists of the
238 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
239 */
240static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
241/*
242 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
243 * and defines the facilities that can be enabled via a cpu model.
244 */
245static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
246
247static unsigned long kvm_s390_fac_size(void)
248{
249 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
250 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
251 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
252 sizeof(stfle_fac_list));
253
254 return SIZE_INTERNAL;
255}
256
257/* available cpu features supported by kvm */
258static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
259/* available subfunctions indicated via query / "test bit" */
260static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
261
262static struct gmap_notifier gmap_notifier;
263static struct gmap_notifier vsie_gmap_notifier;
264debug_info_t *kvm_s390_dbf;
265debug_info_t *kvm_s390_dbf_uv;
266
267/* Section: not file related */
268/* forward declarations */
269static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
270 unsigned long end);
271static int sca_switch_to_extended(struct kvm *kvm);
272
273static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
274{
275 u8 delta_idx = 0;
276
277 /*
278 * The TOD jumps by delta, we have to compensate this by adding
279 * -delta to the epoch.
280 */
281 delta = -delta;
282
283 /* sign-extension - we're adding to signed values below */
284 if ((s64)delta < 0)
285 delta_idx = -1;
286
287 scb->epoch += delta;
288 if (scb->ecd & ECD_MEF) {
289 scb->epdx += delta_idx;
290 if (scb->epoch < delta)
291 scb->epdx += 1;
292 }
293}
294
295/*
296 * This callback is executed during stop_machine(). All CPUs are therefore
297 * temporarily stopped. In order not to change guest behavior, we have to
298 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
299 * so a CPU won't be stopped while calculating with the epoch.
300 */
301static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
302 void *v)
303{
304 struct kvm *kvm;
305 struct kvm_vcpu *vcpu;
306 unsigned long i;
307 unsigned long long *delta = v;
308
309 list_for_each_entry(kvm, &vm_list, vm_list) {
310 kvm_for_each_vcpu(i, vcpu, kvm) {
311 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
312 if (i == 0) {
313 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
314 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
315 }
316 if (vcpu->arch.cputm_enabled)
317 vcpu->arch.cputm_start += *delta;
318 if (vcpu->arch.vsie_block)
319 kvm_clock_sync_scb(vcpu->arch.vsie_block,
320 *delta);
321 }
322 }
323 return NOTIFY_OK;
324}
325
326static struct notifier_block kvm_clock_notifier = {
327 .notifier_call = kvm_clock_sync,
328};
329
330static void allow_cpu_feat(unsigned long nr)
331{
332 set_bit_inv(nr, kvm_s390_available_cpu_feat);
333}
334
335static inline int plo_test_bit(unsigned char nr)
336{
337 unsigned long function = (unsigned long)nr | 0x100;
338 int cc;
339
340 asm volatile(
341 " lgr 0,%[function]\n"
342 /* Parameter registers are ignored for "test bit" */
343 " plo 0,0,0,0(0)\n"
344 CC_IPM(cc)
345 : CC_OUT(cc, cc)
346 : [function] "d" (function)
347 : CC_CLOBBER_LIST("0"));
348 return CC_TRANSFORM(cc) == 0;
349}
350
351static __always_inline void pfcr_query(u8 (*query)[16])
352{
353 asm volatile(
354 " lghi 0,0\n"
355 " .insn rsy,0xeb0000000016,0,0,%[query]\n"
356 : [query] "=QS" (*query)
357 :
358 : "cc", "0");
359}
360
361static __always_inline void __sortl_query(u8 (*query)[32])
362{
363 asm volatile(
364 " lghi 0,0\n"
365 " la 1,%[query]\n"
366 /* Parameter registers are ignored */
367 " .insn rre,0xb9380000,2,4\n"
368 : [query] "=R" (*query)
369 :
370 : "cc", "0", "1");
371}
372
373static __always_inline void __dfltcc_query(u8 (*query)[32])
374{
375 asm volatile(
376 " lghi 0,0\n"
377 " la 1,%[query]\n"
378 /* Parameter registers are ignored */
379 " .insn rrf,0xb9390000,2,4,6,0\n"
380 : [query] "=R" (*query)
381 :
382 : "cc", "0", "1");
383}
384
385static void __init kvm_s390_cpu_feat_init(void)
386{
387 int i;
388
389 for (i = 0; i < 256; ++i) {
390 if (plo_test_bit(i))
391 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
392 }
393
394 if (test_facility(28)) /* TOD-clock steering */
395 ptff(kvm_s390_available_subfunc.ptff,
396 sizeof(kvm_s390_available_subfunc.ptff),
397 PTFF_QAF);
398
399 if (test_facility(17)) { /* MSA */
400 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
401 kvm_s390_available_subfunc.kmac);
402 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
403 kvm_s390_available_subfunc.kmc);
404 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
405 kvm_s390_available_subfunc.km);
406 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
407 kvm_s390_available_subfunc.kimd);
408 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.klmd);
410 }
411 if (test_facility(76)) /* MSA3 */
412 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.pckmo);
414 if (test_facility(77)) { /* MSA4 */
415 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
416 kvm_s390_available_subfunc.kmctr);
417 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
418 kvm_s390_available_subfunc.kmf);
419 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
420 kvm_s390_available_subfunc.kmo);
421 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
422 kvm_s390_available_subfunc.pcc);
423 }
424 if (test_facility(57)) /* MSA5 */
425 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
426 kvm_s390_available_subfunc.ppno);
427
428 if (test_facility(146)) /* MSA8 */
429 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
430 kvm_s390_available_subfunc.kma);
431
432 if (test_facility(155)) /* MSA9 */
433 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
434 kvm_s390_available_subfunc.kdsa);
435
436 if (test_facility(150)) /* SORTL */
437 __sortl_query(&kvm_s390_available_subfunc.sortl);
438
439 if (test_facility(151)) /* DFLTCC */
440 __dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
441
442 if (test_facility(201)) /* PFCR */
443 pfcr_query(&kvm_s390_available_subfunc.pfcr);
444
445 if (MACHINE_HAS_ESOP)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
447 /*
448 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
449 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
450 */
451 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
452 !test_facility(3) || !nested)
453 return;
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
455 if (sclp.has_64bscao)
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
457 if (sclp.has_siif)
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
459 if (sclp.has_gpere)
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
461 if (sclp.has_gsls)
462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
463 if (sclp.has_ib)
464 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
465 if (sclp.has_cei)
466 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
467 if (sclp.has_ibs)
468 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
469 if (sclp.has_kss)
470 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
471 /*
472 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
473 * all skey handling functions read/set the skey from the PGSTE
474 * instead of the real storage key.
475 *
476 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
477 * pages being detected as preserved although they are resident.
478 *
479 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
480 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
481 *
482 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
483 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
484 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
485 *
486 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
487 * cannot easily shadow the SCA because of the ipte lock.
488 */
489}
490
491static int __init __kvm_s390_init(void)
492{
493 int rc = -ENOMEM;
494
495 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
496 if (!kvm_s390_dbf)
497 return -ENOMEM;
498
499 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
500 if (!kvm_s390_dbf_uv)
501 goto err_kvm_uv;
502
503 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
504 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
505 goto err_debug_view;
506
507 kvm_s390_cpu_feat_init();
508
509 /* Register floating interrupt controller interface. */
510 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
511 if (rc) {
512 pr_err("A FLIC registration call failed with rc=%d\n", rc);
513 goto err_flic;
514 }
515
516 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
517 rc = kvm_s390_pci_init();
518 if (rc) {
519 pr_err("Unable to allocate AIFT for PCI\n");
520 goto err_pci;
521 }
522 }
523
524 rc = kvm_s390_gib_init(GAL_ISC);
525 if (rc)
526 goto err_gib;
527
528 gmap_notifier.notifier_call = kvm_gmap_notifier;
529 gmap_register_pte_notifier(&gmap_notifier);
530 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
531 gmap_register_pte_notifier(&vsie_gmap_notifier);
532 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
533 &kvm_clock_notifier);
534
535 return 0;
536
537err_gib:
538 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
539 kvm_s390_pci_exit();
540err_pci:
541err_flic:
542err_debug_view:
543 debug_unregister(kvm_s390_dbf_uv);
544err_kvm_uv:
545 debug_unregister(kvm_s390_dbf);
546 return rc;
547}
548
549static void __kvm_s390_exit(void)
550{
551 gmap_unregister_pte_notifier(&gmap_notifier);
552 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
553 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
554 &kvm_clock_notifier);
555
556 kvm_s390_gib_destroy();
557 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
558 kvm_s390_pci_exit();
559 debug_unregister(kvm_s390_dbf);
560 debug_unregister(kvm_s390_dbf_uv);
561}
562
563/* Section: device related */
564long kvm_arch_dev_ioctl(struct file *filp,
565 unsigned int ioctl, unsigned long arg)
566{
567 if (ioctl == KVM_S390_ENABLE_SIE)
568 return s390_enable_sie();
569 return -EINVAL;
570}
571
572int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
573{
574 int r;
575
576 switch (ext) {
577 case KVM_CAP_S390_PSW:
578 case KVM_CAP_S390_GMAP:
579 case KVM_CAP_SYNC_MMU:
580#ifdef CONFIG_KVM_S390_UCONTROL
581 case KVM_CAP_S390_UCONTROL:
582#endif
583 case KVM_CAP_ASYNC_PF:
584 case KVM_CAP_SYNC_REGS:
585 case KVM_CAP_ONE_REG:
586 case KVM_CAP_ENABLE_CAP:
587 case KVM_CAP_S390_CSS_SUPPORT:
588 case KVM_CAP_IOEVENTFD:
589 case KVM_CAP_S390_IRQCHIP:
590 case KVM_CAP_VM_ATTRIBUTES:
591 case KVM_CAP_MP_STATE:
592 case KVM_CAP_IMMEDIATE_EXIT:
593 case KVM_CAP_S390_INJECT_IRQ:
594 case KVM_CAP_S390_USER_SIGP:
595 case KVM_CAP_S390_USER_STSI:
596 case KVM_CAP_S390_SKEYS:
597 case KVM_CAP_S390_IRQ_STATE:
598 case KVM_CAP_S390_USER_INSTR0:
599 case KVM_CAP_S390_CMMA_MIGRATION:
600 case KVM_CAP_S390_AIS:
601 case KVM_CAP_S390_AIS_MIGRATION:
602 case KVM_CAP_S390_VCPU_RESETS:
603 case KVM_CAP_SET_GUEST_DEBUG:
604 case KVM_CAP_S390_DIAG318:
605 case KVM_CAP_IRQFD_RESAMPLE:
606 r = 1;
607 break;
608 case KVM_CAP_SET_GUEST_DEBUG2:
609 r = KVM_GUESTDBG_VALID_MASK;
610 break;
611 case KVM_CAP_S390_HPAGE_1M:
612 r = 0;
613 if (hpage && !(kvm && kvm_is_ucontrol(kvm)))
614 r = 1;
615 break;
616 case KVM_CAP_S390_MEM_OP:
617 r = MEM_OP_MAX_SIZE;
618 break;
619 case KVM_CAP_S390_MEM_OP_EXTENSION:
620 /*
621 * Flag bits indicating which extensions are supported.
622 * If r > 0, the base extension must also be supported/indicated,
623 * in order to maintain backwards compatibility.
624 */
625 r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
626 KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
627 break;
628 case KVM_CAP_NR_VCPUS:
629 case KVM_CAP_MAX_VCPUS:
630 case KVM_CAP_MAX_VCPU_ID:
631 r = KVM_S390_BSCA_CPU_SLOTS;
632 if (!kvm_s390_use_sca_entries())
633 r = KVM_MAX_VCPUS;
634 else if (sclp.has_esca && sclp.has_64bscao)
635 r = KVM_S390_ESCA_CPU_SLOTS;
636 if (ext == KVM_CAP_NR_VCPUS)
637 r = min_t(unsigned int, num_online_cpus(), r);
638 break;
639 case KVM_CAP_S390_COW:
640 r = MACHINE_HAS_ESOP;
641 break;
642 case KVM_CAP_S390_VECTOR_REGISTERS:
643 r = test_facility(129);
644 break;
645 case KVM_CAP_S390_RI:
646 r = test_facility(64);
647 break;
648 case KVM_CAP_S390_GS:
649 r = test_facility(133);
650 break;
651 case KVM_CAP_S390_BPB:
652 r = test_facility(82);
653 break;
654 case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
655 r = async_destroy && is_prot_virt_host();
656 break;
657 case KVM_CAP_S390_PROTECTED:
658 r = is_prot_virt_host();
659 break;
660 case KVM_CAP_S390_PROTECTED_DUMP: {
661 u64 pv_cmds_dump[] = {
662 BIT_UVC_CMD_DUMP_INIT,
663 BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
664 BIT_UVC_CMD_DUMP_CPU,
665 BIT_UVC_CMD_DUMP_COMPLETE,
666 };
667 int i;
668
669 r = is_prot_virt_host();
670
671 for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
672 if (!test_bit_inv(pv_cmds_dump[i],
673 (unsigned long *)&uv_info.inst_calls_list)) {
674 r = 0;
675 break;
676 }
677 }
678 break;
679 }
680 case KVM_CAP_S390_ZPCI_OP:
681 r = kvm_s390_pci_interp_allowed();
682 break;
683 case KVM_CAP_S390_CPU_TOPOLOGY:
684 r = test_facility(11);
685 break;
686 default:
687 r = 0;
688 }
689 return r;
690}
691
692void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
693{
694 int i;
695 gfn_t cur_gfn, last_gfn;
696 unsigned long gaddr, vmaddr;
697 struct gmap *gmap = kvm->arch.gmap;
698 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
699
700 /* Loop over all guest segments */
701 cur_gfn = memslot->base_gfn;
702 last_gfn = memslot->base_gfn + memslot->npages;
703 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
704 gaddr = gfn_to_gpa(cur_gfn);
705 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
706 if (kvm_is_error_hva(vmaddr))
707 continue;
708
709 bitmap_zero(bitmap, _PAGE_ENTRIES);
710 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
711 for (i = 0; i < _PAGE_ENTRIES; i++) {
712 if (test_bit(i, bitmap))
713 mark_page_dirty(kvm, cur_gfn + i);
714 }
715
716 if (fatal_signal_pending(current))
717 return;
718 cond_resched();
719 }
720}
721
722/* Section: vm related */
723static void sca_del_vcpu(struct kvm_vcpu *vcpu);
724
725/*
726 * Get (and clear) the dirty memory log for a memory slot.
727 */
728int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
729 struct kvm_dirty_log *log)
730{
731 int r;
732 unsigned long n;
733 struct kvm_memory_slot *memslot;
734 int is_dirty;
735
736 if (kvm_is_ucontrol(kvm))
737 return -EINVAL;
738
739 mutex_lock(&kvm->slots_lock);
740
741 r = -EINVAL;
742 if (log->slot >= KVM_USER_MEM_SLOTS)
743 goto out;
744
745 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
746 if (r)
747 goto out;
748
749 /* Clear the dirty log */
750 if (is_dirty) {
751 n = kvm_dirty_bitmap_bytes(memslot);
752 memset(memslot->dirty_bitmap, 0, n);
753 }
754 r = 0;
755out:
756 mutex_unlock(&kvm->slots_lock);
757 return r;
758}
759
760static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
761{
762 unsigned long i;
763 struct kvm_vcpu *vcpu;
764
765 kvm_for_each_vcpu(i, vcpu, kvm) {
766 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
767 }
768}
769
770int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
771{
772 int r;
773
774 if (cap->flags)
775 return -EINVAL;
776
777 switch (cap->cap) {
778 case KVM_CAP_S390_IRQCHIP:
779 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
780 kvm->arch.use_irqchip = 1;
781 r = 0;
782 break;
783 case KVM_CAP_S390_USER_SIGP:
784 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
785 kvm->arch.user_sigp = 1;
786 r = 0;
787 break;
788 case KVM_CAP_S390_VECTOR_REGISTERS:
789 mutex_lock(&kvm->lock);
790 if (kvm->created_vcpus) {
791 r = -EBUSY;
792 } else if (cpu_has_vx()) {
793 set_kvm_facility(kvm->arch.model.fac_mask, 129);
794 set_kvm_facility(kvm->arch.model.fac_list, 129);
795 if (test_facility(134)) {
796 set_kvm_facility(kvm->arch.model.fac_mask, 134);
797 set_kvm_facility(kvm->arch.model.fac_list, 134);
798 }
799 if (test_facility(135)) {
800 set_kvm_facility(kvm->arch.model.fac_mask, 135);
801 set_kvm_facility(kvm->arch.model.fac_list, 135);
802 }
803 if (test_facility(148)) {
804 set_kvm_facility(kvm->arch.model.fac_mask, 148);
805 set_kvm_facility(kvm->arch.model.fac_list, 148);
806 }
807 if (test_facility(152)) {
808 set_kvm_facility(kvm->arch.model.fac_mask, 152);
809 set_kvm_facility(kvm->arch.model.fac_list, 152);
810 }
811 if (test_facility(192)) {
812 set_kvm_facility(kvm->arch.model.fac_mask, 192);
813 set_kvm_facility(kvm->arch.model.fac_list, 192);
814 }
815 if (test_facility(198)) {
816 set_kvm_facility(kvm->arch.model.fac_mask, 198);
817 set_kvm_facility(kvm->arch.model.fac_list, 198);
818 }
819 if (test_facility(199)) {
820 set_kvm_facility(kvm->arch.model.fac_mask, 199);
821 set_kvm_facility(kvm->arch.model.fac_list, 199);
822 }
823 r = 0;
824 } else
825 r = -EINVAL;
826 mutex_unlock(&kvm->lock);
827 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
828 r ? "(not available)" : "(success)");
829 break;
830 case KVM_CAP_S390_RI:
831 r = -EINVAL;
832 mutex_lock(&kvm->lock);
833 if (kvm->created_vcpus) {
834 r = -EBUSY;
835 } else if (test_facility(64)) {
836 set_kvm_facility(kvm->arch.model.fac_mask, 64);
837 set_kvm_facility(kvm->arch.model.fac_list, 64);
838 r = 0;
839 }
840 mutex_unlock(&kvm->lock);
841 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
842 r ? "(not available)" : "(success)");
843 break;
844 case KVM_CAP_S390_AIS:
845 mutex_lock(&kvm->lock);
846 if (kvm->created_vcpus) {
847 r = -EBUSY;
848 } else {
849 set_kvm_facility(kvm->arch.model.fac_mask, 72);
850 set_kvm_facility(kvm->arch.model.fac_list, 72);
851 r = 0;
852 }
853 mutex_unlock(&kvm->lock);
854 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
855 r ? "(not available)" : "(success)");
856 break;
857 case KVM_CAP_S390_GS:
858 r = -EINVAL;
859 mutex_lock(&kvm->lock);
860 if (kvm->created_vcpus) {
861 r = -EBUSY;
862 } else if (test_facility(133)) {
863 set_kvm_facility(kvm->arch.model.fac_mask, 133);
864 set_kvm_facility(kvm->arch.model.fac_list, 133);
865 r = 0;
866 }
867 mutex_unlock(&kvm->lock);
868 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
869 r ? "(not available)" : "(success)");
870 break;
871 case KVM_CAP_S390_HPAGE_1M:
872 mutex_lock(&kvm->lock);
873 if (kvm->created_vcpus)
874 r = -EBUSY;
875 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
876 r = -EINVAL;
877 else {
878 r = 0;
879 mmap_write_lock(kvm->mm);
880 kvm->mm->context.allow_gmap_hpage_1m = 1;
881 mmap_write_unlock(kvm->mm);
882 /*
883 * We might have to create fake 4k page
884 * tables. To avoid that the hardware works on
885 * stale PGSTEs, we emulate these instructions.
886 */
887 kvm->arch.use_skf = 0;
888 kvm->arch.use_pfmfi = 0;
889 }
890 mutex_unlock(&kvm->lock);
891 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
892 r ? "(not available)" : "(success)");
893 break;
894 case KVM_CAP_S390_USER_STSI:
895 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
896 kvm->arch.user_stsi = 1;
897 r = 0;
898 break;
899 case KVM_CAP_S390_USER_INSTR0:
900 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
901 kvm->arch.user_instr0 = 1;
902 icpt_operexc_on_all_vcpus(kvm);
903 r = 0;
904 break;
905 case KVM_CAP_S390_CPU_TOPOLOGY:
906 r = -EINVAL;
907 mutex_lock(&kvm->lock);
908 if (kvm->created_vcpus) {
909 r = -EBUSY;
910 } else if (test_facility(11)) {
911 set_kvm_facility(kvm->arch.model.fac_mask, 11);
912 set_kvm_facility(kvm->arch.model.fac_list, 11);
913 r = 0;
914 }
915 mutex_unlock(&kvm->lock);
916 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
917 r ? "(not available)" : "(success)");
918 break;
919 default:
920 r = -EINVAL;
921 break;
922 }
923 return r;
924}
925
926static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
927{
928 int ret;
929
930 switch (attr->attr) {
931 case KVM_S390_VM_MEM_LIMIT_SIZE:
932 ret = 0;
933 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
934 kvm->arch.mem_limit);
935 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
936 ret = -EFAULT;
937 break;
938 default:
939 ret = -ENXIO;
940 break;
941 }
942 return ret;
943}
944
945static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
946{
947 int ret;
948 unsigned int idx;
949 switch (attr->attr) {
950 case KVM_S390_VM_MEM_ENABLE_CMMA:
951 ret = -ENXIO;
952 if (!sclp.has_cmma)
953 break;
954
955 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
956 mutex_lock(&kvm->lock);
957 if (kvm->created_vcpus)
958 ret = -EBUSY;
959 else if (kvm->mm->context.allow_gmap_hpage_1m)
960 ret = -EINVAL;
961 else {
962 kvm->arch.use_cmma = 1;
963 /* Not compatible with cmma. */
964 kvm->arch.use_pfmfi = 0;
965 ret = 0;
966 }
967 mutex_unlock(&kvm->lock);
968 break;
969 case KVM_S390_VM_MEM_CLR_CMMA:
970 ret = -ENXIO;
971 if (!sclp.has_cmma)
972 break;
973 ret = -EINVAL;
974 if (!kvm->arch.use_cmma)
975 break;
976
977 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
978 mutex_lock(&kvm->lock);
979 idx = srcu_read_lock(&kvm->srcu);
980 s390_reset_cmma(kvm->arch.gmap->mm);
981 srcu_read_unlock(&kvm->srcu, idx);
982 mutex_unlock(&kvm->lock);
983 ret = 0;
984 break;
985 case KVM_S390_VM_MEM_LIMIT_SIZE: {
986 unsigned long new_limit;
987
988 if (kvm_is_ucontrol(kvm))
989 return -EINVAL;
990
991 if (get_user(new_limit, (u64 __user *)attr->addr))
992 return -EFAULT;
993
994 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
995 new_limit > kvm->arch.mem_limit)
996 return -E2BIG;
997
998 if (!new_limit)
999 return -EINVAL;
1000
1001 /* gmap_create takes last usable address */
1002 if (new_limit != KVM_S390_NO_MEM_LIMIT)
1003 new_limit -= 1;
1004
1005 ret = -EBUSY;
1006 mutex_lock(&kvm->lock);
1007 if (!kvm->created_vcpus) {
1008 /* gmap_create will round the limit up */
1009 struct gmap *new = gmap_create(current->mm, new_limit);
1010
1011 if (!new) {
1012 ret = -ENOMEM;
1013 } else {
1014 gmap_remove(kvm->arch.gmap);
1015 new->private = kvm;
1016 kvm->arch.gmap = new;
1017 ret = 0;
1018 }
1019 }
1020 mutex_unlock(&kvm->lock);
1021 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
1022 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
1023 (void *) kvm->arch.gmap->asce);
1024 break;
1025 }
1026 default:
1027 ret = -ENXIO;
1028 break;
1029 }
1030 return ret;
1031}
1032
1033static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
1034
1035void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1036{
1037 struct kvm_vcpu *vcpu;
1038 unsigned long i;
1039
1040 kvm_s390_vcpu_block_all(kvm);
1041
1042 kvm_for_each_vcpu(i, vcpu, kvm) {
1043 kvm_s390_vcpu_crypto_setup(vcpu);
1044 /* recreate the shadow crycb by leaving the VSIE handler */
1045 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1046 }
1047
1048 kvm_s390_vcpu_unblock_all(kvm);
1049}
1050
1051static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1052{
1053 mutex_lock(&kvm->lock);
1054 switch (attr->attr) {
1055 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1056 if (!test_kvm_facility(kvm, 76)) {
1057 mutex_unlock(&kvm->lock);
1058 return -EINVAL;
1059 }
1060 get_random_bytes(
1061 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1062 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1063 kvm->arch.crypto.aes_kw = 1;
1064 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1065 break;
1066 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1067 if (!test_kvm_facility(kvm, 76)) {
1068 mutex_unlock(&kvm->lock);
1069 return -EINVAL;
1070 }
1071 get_random_bytes(
1072 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1073 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1074 kvm->arch.crypto.dea_kw = 1;
1075 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1076 break;
1077 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1078 if (!test_kvm_facility(kvm, 76)) {
1079 mutex_unlock(&kvm->lock);
1080 return -EINVAL;
1081 }
1082 kvm->arch.crypto.aes_kw = 0;
1083 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1084 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1085 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1086 break;
1087 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1088 if (!test_kvm_facility(kvm, 76)) {
1089 mutex_unlock(&kvm->lock);
1090 return -EINVAL;
1091 }
1092 kvm->arch.crypto.dea_kw = 0;
1093 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1094 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1095 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1096 break;
1097 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1098 if (!ap_instructions_available()) {
1099 mutex_unlock(&kvm->lock);
1100 return -EOPNOTSUPP;
1101 }
1102 kvm->arch.crypto.apie = 1;
1103 break;
1104 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1105 if (!ap_instructions_available()) {
1106 mutex_unlock(&kvm->lock);
1107 return -EOPNOTSUPP;
1108 }
1109 kvm->arch.crypto.apie = 0;
1110 break;
1111 default:
1112 mutex_unlock(&kvm->lock);
1113 return -ENXIO;
1114 }
1115
1116 kvm_s390_vcpu_crypto_reset_all(kvm);
1117 mutex_unlock(&kvm->lock);
1118 return 0;
1119}
1120
1121static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1122{
1123 /* Only set the ECB bits after guest requests zPCI interpretation */
1124 if (!vcpu->kvm->arch.use_zpci_interp)
1125 return;
1126
1127 vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1128 vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1129}
1130
1131void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1132{
1133 struct kvm_vcpu *vcpu;
1134 unsigned long i;
1135
1136 lockdep_assert_held(&kvm->lock);
1137
1138 if (!kvm_s390_pci_interp_allowed())
1139 return;
1140
1141 /*
1142 * If host is configured for PCI and the necessary facilities are
1143 * available, turn on interpretation for the life of this guest
1144 */
1145 kvm->arch.use_zpci_interp = 1;
1146
1147 kvm_s390_vcpu_block_all(kvm);
1148
1149 kvm_for_each_vcpu(i, vcpu, kvm) {
1150 kvm_s390_vcpu_pci_setup(vcpu);
1151 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1152 }
1153
1154 kvm_s390_vcpu_unblock_all(kvm);
1155}
1156
1157static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1158{
1159 unsigned long cx;
1160 struct kvm_vcpu *vcpu;
1161
1162 kvm_for_each_vcpu(cx, vcpu, kvm)
1163 kvm_s390_sync_request(req, vcpu);
1164}
1165
1166/*
1167 * Must be called with kvm->srcu held to avoid races on memslots, and with
1168 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1169 */
1170static int kvm_s390_vm_start_migration(struct kvm *kvm)
1171{
1172 struct kvm_memory_slot *ms;
1173 struct kvm_memslots *slots;
1174 unsigned long ram_pages = 0;
1175 int bkt;
1176
1177 /* migration mode already enabled */
1178 if (kvm->arch.migration_mode)
1179 return 0;
1180 slots = kvm_memslots(kvm);
1181 if (!slots || kvm_memslots_empty(slots))
1182 return -EINVAL;
1183
1184 if (!kvm->arch.use_cmma) {
1185 kvm->arch.migration_mode = 1;
1186 return 0;
1187 }
1188 /* mark all the pages in active slots as dirty */
1189 kvm_for_each_memslot(ms, bkt, slots) {
1190 if (!ms->dirty_bitmap)
1191 return -EINVAL;
1192 /*
1193 * The second half of the bitmap is only used on x86,
1194 * and would be wasted otherwise, so we put it to good
1195 * use here to keep track of the state of the storage
1196 * attributes.
1197 */
1198 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1199 ram_pages += ms->npages;
1200 }
1201 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1202 kvm->arch.migration_mode = 1;
1203 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1204 return 0;
1205}
1206
1207/*
1208 * Must be called with kvm->slots_lock to avoid races with ourselves and
1209 * kvm_s390_vm_start_migration.
1210 */
1211static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1212{
1213 /* migration mode already disabled */
1214 if (!kvm->arch.migration_mode)
1215 return 0;
1216 kvm->arch.migration_mode = 0;
1217 if (kvm->arch.use_cmma)
1218 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1219 return 0;
1220}
1221
1222static int kvm_s390_vm_set_migration(struct kvm *kvm,
1223 struct kvm_device_attr *attr)
1224{
1225 int res = -ENXIO;
1226
1227 mutex_lock(&kvm->slots_lock);
1228 switch (attr->attr) {
1229 case KVM_S390_VM_MIGRATION_START:
1230 res = kvm_s390_vm_start_migration(kvm);
1231 break;
1232 case KVM_S390_VM_MIGRATION_STOP:
1233 res = kvm_s390_vm_stop_migration(kvm);
1234 break;
1235 default:
1236 break;
1237 }
1238 mutex_unlock(&kvm->slots_lock);
1239
1240 return res;
1241}
1242
1243static int kvm_s390_vm_get_migration(struct kvm *kvm,
1244 struct kvm_device_attr *attr)
1245{
1246 u64 mig = kvm->arch.migration_mode;
1247
1248 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1249 return -ENXIO;
1250
1251 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1252 return -EFAULT;
1253 return 0;
1254}
1255
1256static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1257
1258static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1259{
1260 struct kvm_s390_vm_tod_clock gtod;
1261
1262 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1263 return -EFAULT;
1264
1265 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1266 return -EINVAL;
1267 __kvm_s390_set_tod_clock(kvm, >od);
1268
1269 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1270 gtod.epoch_idx, gtod.tod);
1271
1272 return 0;
1273}
1274
1275static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1276{
1277 u8 gtod_high;
1278
1279 if (copy_from_user(>od_high, (void __user *)attr->addr,
1280 sizeof(gtod_high)))
1281 return -EFAULT;
1282
1283 if (gtod_high != 0)
1284 return -EINVAL;
1285 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1286
1287 return 0;
1288}
1289
1290static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1291{
1292 struct kvm_s390_vm_tod_clock gtod = { 0 };
1293
1294 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1295 sizeof(gtod.tod)))
1296 return -EFAULT;
1297
1298 __kvm_s390_set_tod_clock(kvm, >od);
1299 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1300 return 0;
1301}
1302
1303static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1304{
1305 int ret;
1306
1307 if (attr->flags)
1308 return -EINVAL;
1309
1310 mutex_lock(&kvm->lock);
1311 /*
1312 * For protected guests, the TOD is managed by the ultravisor, so trying
1313 * to change it will never bring the expected results.
1314 */
1315 if (kvm_s390_pv_is_protected(kvm)) {
1316 ret = -EOPNOTSUPP;
1317 goto out_unlock;
1318 }
1319
1320 switch (attr->attr) {
1321 case KVM_S390_VM_TOD_EXT:
1322 ret = kvm_s390_set_tod_ext(kvm, attr);
1323 break;
1324 case KVM_S390_VM_TOD_HIGH:
1325 ret = kvm_s390_set_tod_high(kvm, attr);
1326 break;
1327 case KVM_S390_VM_TOD_LOW:
1328 ret = kvm_s390_set_tod_low(kvm, attr);
1329 break;
1330 default:
1331 ret = -ENXIO;
1332 break;
1333 }
1334
1335out_unlock:
1336 mutex_unlock(&kvm->lock);
1337 return ret;
1338}
1339
1340static void kvm_s390_get_tod_clock(struct kvm *kvm,
1341 struct kvm_s390_vm_tod_clock *gtod)
1342{
1343 union tod_clock clk;
1344
1345 preempt_disable();
1346
1347 store_tod_clock_ext(&clk);
1348
1349 gtod->tod = clk.tod + kvm->arch.epoch;
1350 gtod->epoch_idx = 0;
1351 if (test_kvm_facility(kvm, 139)) {
1352 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1353 if (gtod->tod < clk.tod)
1354 gtod->epoch_idx += 1;
1355 }
1356
1357 preempt_enable();
1358}
1359
1360static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1361{
1362 struct kvm_s390_vm_tod_clock gtod;
1363
1364 memset(>od, 0, sizeof(gtod));
1365 kvm_s390_get_tod_clock(kvm, >od);
1366 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1367 return -EFAULT;
1368
1369 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1370 gtod.epoch_idx, gtod.tod);
1371 return 0;
1372}
1373
1374static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1375{
1376 u8 gtod_high = 0;
1377
1378 if (copy_to_user((void __user *)attr->addr, >od_high,
1379 sizeof(gtod_high)))
1380 return -EFAULT;
1381 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1382
1383 return 0;
1384}
1385
1386static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1387{
1388 u64 gtod;
1389
1390 gtod = kvm_s390_get_tod_clock_fast(kvm);
1391 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1392 return -EFAULT;
1393 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1394
1395 return 0;
1396}
1397
1398static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1399{
1400 int ret;
1401
1402 if (attr->flags)
1403 return -EINVAL;
1404
1405 switch (attr->attr) {
1406 case KVM_S390_VM_TOD_EXT:
1407 ret = kvm_s390_get_tod_ext(kvm, attr);
1408 break;
1409 case KVM_S390_VM_TOD_HIGH:
1410 ret = kvm_s390_get_tod_high(kvm, attr);
1411 break;
1412 case KVM_S390_VM_TOD_LOW:
1413 ret = kvm_s390_get_tod_low(kvm, attr);
1414 break;
1415 default:
1416 ret = -ENXIO;
1417 break;
1418 }
1419 return ret;
1420}
1421
1422static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1423{
1424 struct kvm_s390_vm_cpu_processor *proc;
1425 u16 lowest_ibc, unblocked_ibc;
1426 int ret = 0;
1427
1428 mutex_lock(&kvm->lock);
1429 if (kvm->created_vcpus) {
1430 ret = -EBUSY;
1431 goto out;
1432 }
1433 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1434 if (!proc) {
1435 ret = -ENOMEM;
1436 goto out;
1437 }
1438 if (!copy_from_user(proc, (void __user *)attr->addr,
1439 sizeof(*proc))) {
1440 kvm->arch.model.cpuid = proc->cpuid;
1441 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1442 unblocked_ibc = sclp.ibc & 0xfff;
1443 if (lowest_ibc && proc->ibc) {
1444 if (proc->ibc > unblocked_ibc)
1445 kvm->arch.model.ibc = unblocked_ibc;
1446 else if (proc->ibc < lowest_ibc)
1447 kvm->arch.model.ibc = lowest_ibc;
1448 else
1449 kvm->arch.model.ibc = proc->ibc;
1450 }
1451 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1452 S390_ARCH_FAC_LIST_SIZE_BYTE);
1453 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1454 kvm->arch.model.ibc,
1455 kvm->arch.model.cpuid);
1456 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1457 kvm->arch.model.fac_list[0],
1458 kvm->arch.model.fac_list[1],
1459 kvm->arch.model.fac_list[2]);
1460 } else
1461 ret = -EFAULT;
1462 kfree(proc);
1463out:
1464 mutex_unlock(&kvm->lock);
1465 return ret;
1466}
1467
1468static int kvm_s390_set_processor_feat(struct kvm *kvm,
1469 struct kvm_device_attr *attr)
1470{
1471 struct kvm_s390_vm_cpu_feat data;
1472
1473 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1474 return -EFAULT;
1475 if (!bitmap_subset((unsigned long *) data.feat,
1476 kvm_s390_available_cpu_feat,
1477 KVM_S390_VM_CPU_FEAT_NR_BITS))
1478 return -EINVAL;
1479
1480 mutex_lock(&kvm->lock);
1481 if (kvm->created_vcpus) {
1482 mutex_unlock(&kvm->lock);
1483 return -EBUSY;
1484 }
1485 bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1486 mutex_unlock(&kvm->lock);
1487 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1488 data.feat[0],
1489 data.feat[1],
1490 data.feat[2]);
1491 return 0;
1492}
1493
1494static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1495 struct kvm_device_attr *attr)
1496{
1497 mutex_lock(&kvm->lock);
1498 if (kvm->created_vcpus) {
1499 mutex_unlock(&kvm->lock);
1500 return -EBUSY;
1501 }
1502
1503 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1504 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1505 mutex_unlock(&kvm->lock);
1506 return -EFAULT;
1507 }
1508 mutex_unlock(&kvm->lock);
1509
1510 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1511 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1512 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1513 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1514 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1515 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1516 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1517 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1518 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1519 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1520 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1521 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1522 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1523 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1524 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1525 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1526 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1527 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1528 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1529 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1530 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1531 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1532 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1533 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1534 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1535 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1536 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1537 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1538 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1539 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1540 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1541 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1542 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1543 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1544 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1545 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1546 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1547 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1548 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1549 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1550 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1551 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1552 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1553 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1554 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1555 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1556 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1557 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1558 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1559 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1560 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1561 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1562 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1563 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1564 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1565 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1566 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1567 VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
1568 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1569 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1570
1571 return 0;
1572}
1573
1574#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK \
1575( \
1576 ((struct kvm_s390_vm_cpu_uv_feat){ \
1577 .ap = 1, \
1578 .ap_intr = 1, \
1579 }) \
1580 .feat \
1581)
1582
1583static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1584{
1585 struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
1586 unsigned long data, filter;
1587
1588 filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1589 if (get_user(data, &ptr->feat))
1590 return -EFAULT;
1591 if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
1592 return -EINVAL;
1593
1594 mutex_lock(&kvm->lock);
1595 if (kvm->created_vcpus) {
1596 mutex_unlock(&kvm->lock);
1597 return -EBUSY;
1598 }
1599 kvm->arch.model.uv_feat_guest.feat = data;
1600 mutex_unlock(&kvm->lock);
1601
1602 VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
1603
1604 return 0;
1605}
1606
1607static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1608{
1609 int ret = -ENXIO;
1610
1611 switch (attr->attr) {
1612 case KVM_S390_VM_CPU_PROCESSOR:
1613 ret = kvm_s390_set_processor(kvm, attr);
1614 break;
1615 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1616 ret = kvm_s390_set_processor_feat(kvm, attr);
1617 break;
1618 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1619 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1620 break;
1621 case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1622 ret = kvm_s390_set_uv_feat(kvm, attr);
1623 break;
1624 }
1625 return ret;
1626}
1627
1628static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1629{
1630 struct kvm_s390_vm_cpu_processor *proc;
1631 int ret = 0;
1632
1633 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1634 if (!proc) {
1635 ret = -ENOMEM;
1636 goto out;
1637 }
1638 proc->cpuid = kvm->arch.model.cpuid;
1639 proc->ibc = kvm->arch.model.ibc;
1640 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1641 S390_ARCH_FAC_LIST_SIZE_BYTE);
1642 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1643 kvm->arch.model.ibc,
1644 kvm->arch.model.cpuid);
1645 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1646 kvm->arch.model.fac_list[0],
1647 kvm->arch.model.fac_list[1],
1648 kvm->arch.model.fac_list[2]);
1649 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1650 ret = -EFAULT;
1651 kfree(proc);
1652out:
1653 return ret;
1654}
1655
1656static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1657{
1658 struct kvm_s390_vm_cpu_machine *mach;
1659 int ret = 0;
1660
1661 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1662 if (!mach) {
1663 ret = -ENOMEM;
1664 goto out;
1665 }
1666 get_cpu_id((struct cpuid *) &mach->cpuid);
1667 mach->ibc = sclp.ibc;
1668 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1669 S390_ARCH_FAC_LIST_SIZE_BYTE);
1670 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1671 sizeof(stfle_fac_list));
1672 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1673 kvm->arch.model.ibc,
1674 kvm->arch.model.cpuid);
1675 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1676 mach->fac_mask[0],
1677 mach->fac_mask[1],
1678 mach->fac_mask[2]);
1679 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1680 mach->fac_list[0],
1681 mach->fac_list[1],
1682 mach->fac_list[2]);
1683 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1684 ret = -EFAULT;
1685 kfree(mach);
1686out:
1687 return ret;
1688}
1689
1690static int kvm_s390_get_processor_feat(struct kvm *kvm,
1691 struct kvm_device_attr *attr)
1692{
1693 struct kvm_s390_vm_cpu_feat data;
1694
1695 bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1696 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1697 return -EFAULT;
1698 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1699 data.feat[0],
1700 data.feat[1],
1701 data.feat[2]);
1702 return 0;
1703}
1704
1705static int kvm_s390_get_machine_feat(struct kvm *kvm,
1706 struct kvm_device_attr *attr)
1707{
1708 struct kvm_s390_vm_cpu_feat data;
1709
1710 bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1711 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1712 return -EFAULT;
1713 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1714 data.feat[0],
1715 data.feat[1],
1716 data.feat[2]);
1717 return 0;
1718}
1719
1720static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1721 struct kvm_device_attr *attr)
1722{
1723 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1724 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1725 return -EFAULT;
1726
1727 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1728 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1729 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1730 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1731 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1732 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1733 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1734 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1735 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1736 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1737 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1738 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1739 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1740 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1741 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1742 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1743 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1744 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1745 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1746 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1747 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1748 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1749 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1750 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1751 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1752 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1753 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1754 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1755 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1756 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1757 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1758 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1759 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1760 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1761 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1762 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1763 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1764 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1765 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1766 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1767 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1768 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1769 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1770 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1771 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1772 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1773 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1774 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1775 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1776 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1777 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1778 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1779 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1780 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1781 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1782 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1783 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1784 VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
1785 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1786 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1787
1788 return 0;
1789}
1790
1791static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1792 struct kvm_device_attr *attr)
1793{
1794 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1795 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1796 return -EFAULT;
1797
1798 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1799 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1800 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1801 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1802 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1803 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1804 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1805 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1806 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1807 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1808 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1809 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1810 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1811 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1812 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1813 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1814 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1815 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1816 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1817 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1818 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1819 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1820 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1821 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1822 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1823 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1824 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1825 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1826 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1827 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1828 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1829 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1830 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1831 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1832 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1833 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1834 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1835 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1836 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1837 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1838 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1839 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1840 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1841 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1842 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1843 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1844 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1845 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1846 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1847 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1848 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1849 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1850 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1851 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1852 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1853 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1854 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1855 VM_EVENT(kvm, 3, "GET: host PFCR subfunc 0x%16.16lx.%16.16lx",
1856 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1857 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1858
1859 return 0;
1860}
1861
1862static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1863{
1864 struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1865 unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
1866
1867 if (put_user(feat, &dst->feat))
1868 return -EFAULT;
1869 VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1870
1871 return 0;
1872}
1873
1874static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1875{
1876 struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1877 unsigned long feat;
1878
1879 BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
1880
1881 feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1882 if (put_user(feat, &dst->feat))
1883 return -EFAULT;
1884 VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1885
1886 return 0;
1887}
1888
1889static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1890{
1891 int ret = -ENXIO;
1892
1893 switch (attr->attr) {
1894 case KVM_S390_VM_CPU_PROCESSOR:
1895 ret = kvm_s390_get_processor(kvm, attr);
1896 break;
1897 case KVM_S390_VM_CPU_MACHINE:
1898 ret = kvm_s390_get_machine(kvm, attr);
1899 break;
1900 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1901 ret = kvm_s390_get_processor_feat(kvm, attr);
1902 break;
1903 case KVM_S390_VM_CPU_MACHINE_FEAT:
1904 ret = kvm_s390_get_machine_feat(kvm, attr);
1905 break;
1906 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1907 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1908 break;
1909 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1910 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1911 break;
1912 case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1913 ret = kvm_s390_get_processor_uv_feat(kvm, attr);
1914 break;
1915 case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
1916 ret = kvm_s390_get_machine_uv_feat(kvm, attr);
1917 break;
1918 }
1919 return ret;
1920}
1921
1922/**
1923 * kvm_s390_update_topology_change_report - update CPU topology change report
1924 * @kvm: guest KVM description
1925 * @val: set or clear the MTCR bit
1926 *
1927 * Updates the Multiprocessor Topology-Change-Report bit to signal
1928 * the guest with a topology change.
1929 * This is only relevant if the topology facility is present.
1930 *
1931 * The SCA version, bsca or esca, doesn't matter as offset is the same.
1932 */
1933static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1934{
1935 union sca_utility new, old;
1936 struct bsca_block *sca;
1937
1938 read_lock(&kvm->arch.sca_lock);
1939 sca = kvm->arch.sca;
1940 old = READ_ONCE(sca->utility);
1941 do {
1942 new = old;
1943 new.mtcr = val;
1944 } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
1945 read_unlock(&kvm->arch.sca_lock);
1946}
1947
1948static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1949 struct kvm_device_attr *attr)
1950{
1951 if (!test_kvm_facility(kvm, 11))
1952 return -ENXIO;
1953
1954 kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1955 return 0;
1956}
1957
1958static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1959 struct kvm_device_attr *attr)
1960{
1961 u8 topo;
1962
1963 if (!test_kvm_facility(kvm, 11))
1964 return -ENXIO;
1965
1966 read_lock(&kvm->arch.sca_lock);
1967 topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1968 read_unlock(&kvm->arch.sca_lock);
1969
1970 return put_user(topo, (u8 __user *)attr->addr);
1971}
1972
1973static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1974{
1975 int ret;
1976
1977 switch (attr->group) {
1978 case KVM_S390_VM_MEM_CTRL:
1979 ret = kvm_s390_set_mem_control(kvm, attr);
1980 break;
1981 case KVM_S390_VM_TOD:
1982 ret = kvm_s390_set_tod(kvm, attr);
1983 break;
1984 case KVM_S390_VM_CPU_MODEL:
1985 ret = kvm_s390_set_cpu_model(kvm, attr);
1986 break;
1987 case KVM_S390_VM_CRYPTO:
1988 ret = kvm_s390_vm_set_crypto(kvm, attr);
1989 break;
1990 case KVM_S390_VM_MIGRATION:
1991 ret = kvm_s390_vm_set_migration(kvm, attr);
1992 break;
1993 case KVM_S390_VM_CPU_TOPOLOGY:
1994 ret = kvm_s390_set_topo_change_indication(kvm, attr);
1995 break;
1996 default:
1997 ret = -ENXIO;
1998 break;
1999 }
2000
2001 return ret;
2002}
2003
2004static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2005{
2006 int ret;
2007
2008 switch (attr->group) {
2009 case KVM_S390_VM_MEM_CTRL:
2010 ret = kvm_s390_get_mem_control(kvm, attr);
2011 break;
2012 case KVM_S390_VM_TOD:
2013 ret = kvm_s390_get_tod(kvm, attr);
2014 break;
2015 case KVM_S390_VM_CPU_MODEL:
2016 ret = kvm_s390_get_cpu_model(kvm, attr);
2017 break;
2018 case KVM_S390_VM_MIGRATION:
2019 ret = kvm_s390_vm_get_migration(kvm, attr);
2020 break;
2021 case KVM_S390_VM_CPU_TOPOLOGY:
2022 ret = kvm_s390_get_topo_change_indication(kvm, attr);
2023 break;
2024 default:
2025 ret = -ENXIO;
2026 break;
2027 }
2028
2029 return ret;
2030}
2031
2032static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2033{
2034 int ret;
2035
2036 switch (attr->group) {
2037 case KVM_S390_VM_MEM_CTRL:
2038 switch (attr->attr) {
2039 case KVM_S390_VM_MEM_ENABLE_CMMA:
2040 case KVM_S390_VM_MEM_CLR_CMMA:
2041 ret = sclp.has_cmma ? 0 : -ENXIO;
2042 break;
2043 case KVM_S390_VM_MEM_LIMIT_SIZE:
2044 ret = 0;
2045 break;
2046 default:
2047 ret = -ENXIO;
2048 break;
2049 }
2050 break;
2051 case KVM_S390_VM_TOD:
2052 switch (attr->attr) {
2053 case KVM_S390_VM_TOD_LOW:
2054 case KVM_S390_VM_TOD_HIGH:
2055 ret = 0;
2056 break;
2057 default:
2058 ret = -ENXIO;
2059 break;
2060 }
2061 break;
2062 case KVM_S390_VM_CPU_MODEL:
2063 switch (attr->attr) {
2064 case KVM_S390_VM_CPU_PROCESSOR:
2065 case KVM_S390_VM_CPU_MACHINE:
2066 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
2067 case KVM_S390_VM_CPU_MACHINE_FEAT:
2068 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
2069 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
2070 case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
2071 case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
2072 ret = 0;
2073 break;
2074 default:
2075 ret = -ENXIO;
2076 break;
2077 }
2078 break;
2079 case KVM_S390_VM_CRYPTO:
2080 switch (attr->attr) {
2081 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
2082 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
2083 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
2084 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
2085 ret = 0;
2086 break;
2087 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
2088 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
2089 ret = ap_instructions_available() ? 0 : -ENXIO;
2090 break;
2091 default:
2092 ret = -ENXIO;
2093 break;
2094 }
2095 break;
2096 case KVM_S390_VM_MIGRATION:
2097 ret = 0;
2098 break;
2099 case KVM_S390_VM_CPU_TOPOLOGY:
2100 ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
2101 break;
2102 default:
2103 ret = -ENXIO;
2104 break;
2105 }
2106
2107 return ret;
2108}
2109
2110static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2111{
2112 uint8_t *keys;
2113 uint64_t hva;
2114 int srcu_idx, i, r = 0;
2115
2116 if (args->flags != 0)
2117 return -EINVAL;
2118
2119 /* Is this guest using storage keys? */
2120 if (!mm_uses_skeys(current->mm))
2121 return KVM_S390_GET_SKEYS_NONE;
2122
2123 /* Enforce sane limit on memory allocation */
2124 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2125 return -EINVAL;
2126
2127 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2128 if (!keys)
2129 return -ENOMEM;
2130
2131 mmap_read_lock(current->mm);
2132 srcu_idx = srcu_read_lock(&kvm->srcu);
2133 for (i = 0; i < args->count; i++) {
2134 hva = gfn_to_hva(kvm, args->start_gfn + i);
2135 if (kvm_is_error_hva(hva)) {
2136 r = -EFAULT;
2137 break;
2138 }
2139
2140 r = get_guest_storage_key(current->mm, hva, &keys[i]);
2141 if (r)
2142 break;
2143 }
2144 srcu_read_unlock(&kvm->srcu, srcu_idx);
2145 mmap_read_unlock(current->mm);
2146
2147 if (!r) {
2148 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2149 sizeof(uint8_t) * args->count);
2150 if (r)
2151 r = -EFAULT;
2152 }
2153
2154 kvfree(keys);
2155 return r;
2156}
2157
2158static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2159{
2160 uint8_t *keys;
2161 uint64_t hva;
2162 int srcu_idx, i, r = 0;
2163 bool unlocked;
2164
2165 if (args->flags != 0)
2166 return -EINVAL;
2167
2168 /* Enforce sane limit on memory allocation */
2169 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2170 return -EINVAL;
2171
2172 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2173 if (!keys)
2174 return -ENOMEM;
2175
2176 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2177 sizeof(uint8_t) * args->count);
2178 if (r) {
2179 r = -EFAULT;
2180 goto out;
2181 }
2182
2183 /* Enable storage key handling for the guest */
2184 r = s390_enable_skey();
2185 if (r)
2186 goto out;
2187
2188 i = 0;
2189 mmap_read_lock(current->mm);
2190 srcu_idx = srcu_read_lock(&kvm->srcu);
2191 while (i < args->count) {
2192 unlocked = false;
2193 hva = gfn_to_hva(kvm, args->start_gfn + i);
2194 if (kvm_is_error_hva(hva)) {
2195 r = -EFAULT;
2196 break;
2197 }
2198
2199 /* Lowest order bit is reserved */
2200 if (keys[i] & 0x01) {
2201 r = -EINVAL;
2202 break;
2203 }
2204
2205 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2206 if (r) {
2207 r = fixup_user_fault(current->mm, hva,
2208 FAULT_FLAG_WRITE, &unlocked);
2209 if (r)
2210 break;
2211 }
2212 if (!r)
2213 i++;
2214 }
2215 srcu_read_unlock(&kvm->srcu, srcu_idx);
2216 mmap_read_unlock(current->mm);
2217out:
2218 kvfree(keys);
2219 return r;
2220}
2221
2222/*
2223 * Base address and length must be sent at the start of each block, therefore
2224 * it's cheaper to send some clean data, as long as it's less than the size of
2225 * two longs.
2226 */
2227#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2228/* for consistency */
2229#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2230
2231static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2232 u8 *res, unsigned long bufsize)
2233{
2234 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2235
2236 args->count = 0;
2237 while (args->count < bufsize) {
2238 hva = gfn_to_hva(kvm, cur_gfn);
2239 /*
2240 * We return an error if the first value was invalid, but we
2241 * return successfully if at least one value was copied.
2242 */
2243 if (kvm_is_error_hva(hva))
2244 return args->count ? 0 : -EFAULT;
2245 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2246 pgstev = 0;
2247 res[args->count++] = (pgstev >> 24) & 0x43;
2248 cur_gfn++;
2249 }
2250
2251 return 0;
2252}
2253
2254static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2255 gfn_t gfn)
2256{
2257 return ____gfn_to_memslot(slots, gfn, true);
2258}
2259
2260static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2261 unsigned long cur_gfn)
2262{
2263 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2264 unsigned long ofs = cur_gfn - ms->base_gfn;
2265 struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2266
2267 if (ms->base_gfn + ms->npages <= cur_gfn) {
2268 mnode = rb_next(mnode);
2269 /* If we are above the highest slot, wrap around */
2270 if (!mnode)
2271 mnode = rb_first(&slots->gfn_tree);
2272
2273 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2274 ofs = 0;
2275 }
2276
2277 if (cur_gfn < ms->base_gfn)
2278 ofs = 0;
2279
2280 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2281 while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2282 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2283 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2284 }
2285 return ms->base_gfn + ofs;
2286}
2287
2288static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2289 u8 *res, unsigned long bufsize)
2290{
2291 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2292 struct kvm_memslots *slots = kvm_memslots(kvm);
2293 struct kvm_memory_slot *ms;
2294
2295 if (unlikely(kvm_memslots_empty(slots)))
2296 return 0;
2297
2298 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2299 ms = gfn_to_memslot(kvm, cur_gfn);
2300 args->count = 0;
2301 args->start_gfn = cur_gfn;
2302 if (!ms)
2303 return 0;
2304 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2305 mem_end = kvm_s390_get_gfn_end(slots);
2306
2307 while (args->count < bufsize) {
2308 hva = gfn_to_hva(kvm, cur_gfn);
2309 if (kvm_is_error_hva(hva))
2310 return 0;
2311 /* Decrement only if we actually flipped the bit to 0 */
2312 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2313 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2314 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2315 pgstev = 0;
2316 /* Save the value */
2317 res[args->count++] = (pgstev >> 24) & 0x43;
2318 /* If the next bit is too far away, stop. */
2319 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2320 return 0;
2321 /* If we reached the previous "next", find the next one */
2322 if (cur_gfn == next_gfn)
2323 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2324 /* Reached the end of memory or of the buffer, stop */
2325 if ((next_gfn >= mem_end) ||
2326 (next_gfn - args->start_gfn >= bufsize))
2327 return 0;
2328 cur_gfn++;
2329 /* Reached the end of the current memslot, take the next one. */
2330 if (cur_gfn - ms->base_gfn >= ms->npages) {
2331 ms = gfn_to_memslot(kvm, cur_gfn);
2332 if (!ms)
2333 return 0;
2334 }
2335 }
2336 return 0;
2337}
2338
2339/*
2340 * This function searches for the next page with dirty CMMA attributes, and
2341 * saves the attributes in the buffer up to either the end of the buffer or
2342 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2343 * no trailing clean bytes are saved.
2344 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2345 * output buffer will indicate 0 as length.
2346 */
2347static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2348 struct kvm_s390_cmma_log *args)
2349{
2350 unsigned long bufsize;
2351 int srcu_idx, peek, ret;
2352 u8 *values;
2353
2354 if (!kvm->arch.use_cmma)
2355 return -ENXIO;
2356 /* Invalid/unsupported flags were specified */
2357 if (args->flags & ~KVM_S390_CMMA_PEEK)
2358 return -EINVAL;
2359 /* Migration mode query, and we are not doing a migration */
2360 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2361 if (!peek && !kvm->arch.migration_mode)
2362 return -EINVAL;
2363 /* CMMA is disabled or was not used, or the buffer has length zero */
2364 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2365 if (!bufsize || !kvm->mm->context.uses_cmm) {
2366 memset(args, 0, sizeof(*args));
2367 return 0;
2368 }
2369 /* We are not peeking, and there are no dirty pages */
2370 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2371 memset(args, 0, sizeof(*args));
2372 return 0;
2373 }
2374
2375 values = vmalloc(bufsize);
2376 if (!values)
2377 return -ENOMEM;
2378
2379 mmap_read_lock(kvm->mm);
2380 srcu_idx = srcu_read_lock(&kvm->srcu);
2381 if (peek)
2382 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2383 else
2384 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2385 srcu_read_unlock(&kvm->srcu, srcu_idx);
2386 mmap_read_unlock(kvm->mm);
2387
2388 if (kvm->arch.migration_mode)
2389 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2390 else
2391 args->remaining = 0;
2392
2393 if (copy_to_user((void __user *)args->values, values, args->count))
2394 ret = -EFAULT;
2395
2396 vfree(values);
2397 return ret;
2398}
2399
2400/*
2401 * This function sets the CMMA attributes for the given pages. If the input
2402 * buffer has zero length, no action is taken, otherwise the attributes are
2403 * set and the mm->context.uses_cmm flag is set.
2404 */
2405static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2406 const struct kvm_s390_cmma_log *args)
2407{
2408 unsigned long hva, mask, pgstev, i;
2409 uint8_t *bits;
2410 int srcu_idx, r = 0;
2411
2412 mask = args->mask;
2413
2414 if (!kvm->arch.use_cmma)
2415 return -ENXIO;
2416 /* invalid/unsupported flags */
2417 if (args->flags != 0)
2418 return -EINVAL;
2419 /* Enforce sane limit on memory allocation */
2420 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2421 return -EINVAL;
2422 /* Nothing to do */
2423 if (args->count == 0)
2424 return 0;
2425
2426 bits = vmalloc(array_size(sizeof(*bits), args->count));
2427 if (!bits)
2428 return -ENOMEM;
2429
2430 r = copy_from_user(bits, (void __user *)args->values, args->count);
2431 if (r) {
2432 r = -EFAULT;
2433 goto out;
2434 }
2435
2436 mmap_read_lock(kvm->mm);
2437 srcu_idx = srcu_read_lock(&kvm->srcu);
2438 for (i = 0; i < args->count; i++) {
2439 hva = gfn_to_hva(kvm, args->start_gfn + i);
2440 if (kvm_is_error_hva(hva)) {
2441 r = -EFAULT;
2442 break;
2443 }
2444
2445 pgstev = bits[i];
2446 pgstev = pgstev << 24;
2447 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2448 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2449 }
2450 srcu_read_unlock(&kvm->srcu, srcu_idx);
2451 mmap_read_unlock(kvm->mm);
2452
2453 if (!kvm->mm->context.uses_cmm) {
2454 mmap_write_lock(kvm->mm);
2455 kvm->mm->context.uses_cmm = 1;
2456 mmap_write_unlock(kvm->mm);
2457 }
2458out:
2459 vfree(bits);
2460 return r;
2461}
2462
2463/**
2464 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2465 * non protected.
2466 * @kvm: the VM whose protected vCPUs are to be converted
2467 * @rc: return value for the RC field of the UVC (in case of error)
2468 * @rrc: return value for the RRC field of the UVC (in case of error)
2469 *
2470 * Does not stop in case of error, tries to convert as many
2471 * CPUs as possible. In case of error, the RC and RRC of the last error are
2472 * returned.
2473 *
2474 * Return: 0 in case of success, otherwise -EIO
2475 */
2476int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2477{
2478 struct kvm_vcpu *vcpu;
2479 unsigned long i;
2480 u16 _rc, _rrc;
2481 int ret = 0;
2482
2483 /*
2484 * We ignore failures and try to destroy as many CPUs as possible.
2485 * At the same time we must not free the assigned resources when
2486 * this fails, as the ultravisor has still access to that memory.
2487 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2488 * behind.
2489 * We want to return the first failure rc and rrc, though.
2490 */
2491 kvm_for_each_vcpu(i, vcpu, kvm) {
2492 mutex_lock(&vcpu->mutex);
2493 if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2494 *rc = _rc;
2495 *rrc = _rrc;
2496 ret = -EIO;
2497 }
2498 mutex_unlock(&vcpu->mutex);
2499 }
2500 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2501 if (use_gisa)
2502 kvm_s390_gisa_enable(kvm);
2503 return ret;
2504}
2505
2506/**
2507 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2508 * to protected.
2509 * @kvm: the VM whose protected vCPUs are to be converted
2510 * @rc: return value for the RC field of the UVC (in case of error)
2511 * @rrc: return value for the RRC field of the UVC (in case of error)
2512 *
2513 * Tries to undo the conversion in case of error.
2514 *
2515 * Return: 0 in case of success, otherwise -EIO
2516 */
2517static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2518{
2519 unsigned long i;
2520 int r = 0;
2521 u16 dummy;
2522
2523 struct kvm_vcpu *vcpu;
2524
2525 /* Disable the GISA if the ultravisor does not support AIV. */
2526 if (!uv_has_feature(BIT_UV_FEAT_AIV))
2527 kvm_s390_gisa_disable(kvm);
2528
2529 kvm_for_each_vcpu(i, vcpu, kvm) {
2530 mutex_lock(&vcpu->mutex);
2531 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2532 mutex_unlock(&vcpu->mutex);
2533 if (r)
2534 break;
2535 }
2536 if (r)
2537 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2538 return r;
2539}
2540
2541/*
2542 * Here we provide user space with a direct interface to query UV
2543 * related data like UV maxima and available features as well as
2544 * feature specific data.
2545 *
2546 * To facilitate future extension of the data structures we'll try to
2547 * write data up to the maximum requested length.
2548 */
2549static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2550{
2551 ssize_t len_min;
2552
2553 switch (info->header.id) {
2554 case KVM_PV_INFO_VM: {
2555 len_min = sizeof(info->header) + sizeof(info->vm);
2556
2557 if (info->header.len_max < len_min)
2558 return -EINVAL;
2559
2560 memcpy(info->vm.inst_calls_list,
2561 uv_info.inst_calls_list,
2562 sizeof(uv_info.inst_calls_list));
2563
2564 /* It's max cpuid not max cpus, so it's off by one */
2565 info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2566 info->vm.max_guests = uv_info.max_num_sec_conf;
2567 info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2568 info->vm.feature_indication = uv_info.uv_feature_indications;
2569
2570 return len_min;
2571 }
2572 case KVM_PV_INFO_DUMP: {
2573 len_min = sizeof(info->header) + sizeof(info->dump);
2574
2575 if (info->header.len_max < len_min)
2576 return -EINVAL;
2577
2578 info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2579 info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2580 info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2581 return len_min;
2582 }
2583 default:
2584 return -EINVAL;
2585 }
2586}
2587
2588static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2589 struct kvm_s390_pv_dmp dmp)
2590{
2591 int r = -EINVAL;
2592 void __user *result_buff = (void __user *)dmp.buff_addr;
2593
2594 switch (dmp.subcmd) {
2595 case KVM_PV_DUMP_INIT: {
2596 if (kvm->arch.pv.dumping)
2597 break;
2598
2599 /*
2600 * Block SIE entry as concurrent dump UVCs could lead
2601 * to validities.
2602 */
2603 kvm_s390_vcpu_block_all(kvm);
2604
2605 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2606 UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2607 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2608 cmd->rc, cmd->rrc);
2609 if (!r) {
2610 kvm->arch.pv.dumping = true;
2611 } else {
2612 kvm_s390_vcpu_unblock_all(kvm);
2613 r = -EINVAL;
2614 }
2615 break;
2616 }
2617 case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2618 if (!kvm->arch.pv.dumping)
2619 break;
2620
2621 /*
2622 * gaddr is an output parameter since we might stop
2623 * early. As dmp will be copied back in our caller, we
2624 * don't need to do it ourselves.
2625 */
2626 r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2627 &cmd->rc, &cmd->rrc);
2628 break;
2629 }
2630 case KVM_PV_DUMP_COMPLETE: {
2631 if (!kvm->arch.pv.dumping)
2632 break;
2633
2634 r = -EINVAL;
2635 if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2636 break;
2637
2638 r = kvm_s390_pv_dump_complete(kvm, result_buff,
2639 &cmd->rc, &cmd->rrc);
2640 break;
2641 }
2642 default:
2643 r = -ENOTTY;
2644 break;
2645 }
2646
2647 return r;
2648}
2649
2650static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2651{
2652 const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2653 void __user *argp = (void __user *)cmd->data;
2654 int r = 0;
2655 u16 dummy;
2656
2657 if (need_lock)
2658 mutex_lock(&kvm->lock);
2659
2660 switch (cmd->cmd) {
2661 case KVM_PV_ENABLE: {
2662 r = -EINVAL;
2663 if (kvm_s390_pv_is_protected(kvm))
2664 break;
2665
2666 /*
2667 * FMT 4 SIE needs esca. As we never switch back to bsca from
2668 * esca, we need no cleanup in the error cases below
2669 */
2670 r = sca_switch_to_extended(kvm);
2671 if (r)
2672 break;
2673
2674 r = s390_disable_cow_sharing();
2675 if (r)
2676 break;
2677
2678 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2679 if (r)
2680 break;
2681
2682 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2683 if (r)
2684 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2685
2686 /* we need to block service interrupts from now on */
2687 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2688 break;
2689 }
2690 case KVM_PV_ASYNC_CLEANUP_PREPARE:
2691 r = -EINVAL;
2692 if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2693 break;
2694
2695 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2696 /*
2697 * If a CPU could not be destroyed, destroy VM will also fail.
2698 * There is no point in trying to destroy it. Instead return
2699 * the rc and rrc from the first CPU that failed destroying.
2700 */
2701 if (r)
2702 break;
2703 r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2704
2705 /* no need to block service interrupts any more */
2706 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2707 break;
2708 case KVM_PV_ASYNC_CLEANUP_PERFORM:
2709 r = -EINVAL;
2710 if (!async_destroy)
2711 break;
2712 /* kvm->lock must not be held; this is asserted inside the function. */
2713 r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2714 break;
2715 case KVM_PV_DISABLE: {
2716 r = -EINVAL;
2717 if (!kvm_s390_pv_is_protected(kvm))
2718 break;
2719
2720 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2721 /*
2722 * If a CPU could not be destroyed, destroy VM will also fail.
2723 * There is no point in trying to destroy it. Instead return
2724 * the rc and rrc from the first CPU that failed destroying.
2725 */
2726 if (r)
2727 break;
2728 r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2729
2730 /* no need to block service interrupts any more */
2731 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2732 break;
2733 }
2734 case KVM_PV_SET_SEC_PARMS: {
2735 struct kvm_s390_pv_sec_parm parms = {};
2736 void *hdr;
2737
2738 r = -EINVAL;
2739 if (!kvm_s390_pv_is_protected(kvm))
2740 break;
2741
2742 r = -EFAULT;
2743 if (copy_from_user(&parms, argp, sizeof(parms)))
2744 break;
2745
2746 /* Currently restricted to 8KB */
2747 r = -EINVAL;
2748 if (parms.length > PAGE_SIZE * 2)
2749 break;
2750
2751 r = -ENOMEM;
2752 hdr = vmalloc(parms.length);
2753 if (!hdr)
2754 break;
2755
2756 r = -EFAULT;
2757 if (!copy_from_user(hdr, (void __user *)parms.origin,
2758 parms.length))
2759 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2760 &cmd->rc, &cmd->rrc);
2761
2762 vfree(hdr);
2763 break;
2764 }
2765 case KVM_PV_UNPACK: {
2766 struct kvm_s390_pv_unp unp = {};
2767
2768 r = -EINVAL;
2769 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2770 break;
2771
2772 r = -EFAULT;
2773 if (copy_from_user(&unp, argp, sizeof(unp)))
2774 break;
2775
2776 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2777 &cmd->rc, &cmd->rrc);
2778 break;
2779 }
2780 case KVM_PV_VERIFY: {
2781 r = -EINVAL;
2782 if (!kvm_s390_pv_is_protected(kvm))
2783 break;
2784
2785 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2786 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2787 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2788 cmd->rrc);
2789 break;
2790 }
2791 case KVM_PV_PREP_RESET: {
2792 r = -EINVAL;
2793 if (!kvm_s390_pv_is_protected(kvm))
2794 break;
2795
2796 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2797 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2798 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2799 cmd->rc, cmd->rrc);
2800 break;
2801 }
2802 case KVM_PV_UNSHARE_ALL: {
2803 r = -EINVAL;
2804 if (!kvm_s390_pv_is_protected(kvm))
2805 break;
2806
2807 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2808 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2809 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2810 cmd->rc, cmd->rrc);
2811 break;
2812 }
2813 case KVM_PV_INFO: {
2814 struct kvm_s390_pv_info info = {};
2815 ssize_t data_len;
2816
2817 /*
2818 * No need to check the VM protection here.
2819 *
2820 * Maybe user space wants to query some of the data
2821 * when the VM is still unprotected. If we see the
2822 * need to fence a new data command we can still
2823 * return an error in the info handler.
2824 */
2825
2826 r = -EFAULT;
2827 if (copy_from_user(&info, argp, sizeof(info.header)))
2828 break;
2829
2830 r = -EINVAL;
2831 if (info.header.len_max < sizeof(info.header))
2832 break;
2833
2834 data_len = kvm_s390_handle_pv_info(&info);
2835 if (data_len < 0) {
2836 r = data_len;
2837 break;
2838 }
2839 /*
2840 * If a data command struct is extended (multiple
2841 * times) this can be used to determine how much of it
2842 * is valid.
2843 */
2844 info.header.len_written = data_len;
2845
2846 r = -EFAULT;
2847 if (copy_to_user(argp, &info, data_len))
2848 break;
2849
2850 r = 0;
2851 break;
2852 }
2853 case KVM_PV_DUMP: {
2854 struct kvm_s390_pv_dmp dmp;
2855
2856 r = -EINVAL;
2857 if (!kvm_s390_pv_is_protected(kvm))
2858 break;
2859
2860 r = -EFAULT;
2861 if (copy_from_user(&dmp, argp, sizeof(dmp)))
2862 break;
2863
2864 r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2865 if (r)
2866 break;
2867
2868 if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2869 r = -EFAULT;
2870 break;
2871 }
2872
2873 break;
2874 }
2875 default:
2876 r = -ENOTTY;
2877 }
2878 if (need_lock)
2879 mutex_unlock(&kvm->lock);
2880
2881 return r;
2882}
2883
2884static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
2885{
2886 if (mop->flags & ~supported_flags || !mop->size)
2887 return -EINVAL;
2888 if (mop->size > MEM_OP_MAX_SIZE)
2889 return -E2BIG;
2890 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2891 if (mop->key > 0xf)
2892 return -EINVAL;
2893 } else {
2894 mop->key = 0;
2895 }
2896 return 0;
2897}
2898
2899static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2900{
2901 void __user *uaddr = (void __user *)mop->buf;
2902 enum gacc_mode acc_mode;
2903 void *tmpbuf = NULL;
2904 int r, srcu_idx;
2905
2906 r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
2907 KVM_S390_MEMOP_F_CHECK_ONLY);
2908 if (r)
2909 return r;
2910
2911 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2912 tmpbuf = vmalloc(mop->size);
2913 if (!tmpbuf)
2914 return -ENOMEM;
2915 }
2916
2917 srcu_idx = srcu_read_lock(&kvm->srcu);
2918
2919 if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2920 r = PGM_ADDRESSING;
2921 goto out_unlock;
2922 }
2923
2924 acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
2925 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2926 r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
2927 goto out_unlock;
2928 }
2929 if (acc_mode == GACC_FETCH) {
2930 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2931 mop->size, GACC_FETCH, mop->key);
2932 if (r)
2933 goto out_unlock;
2934 if (copy_to_user(uaddr, tmpbuf, mop->size))
2935 r = -EFAULT;
2936 } else {
2937 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2938 r = -EFAULT;
2939 goto out_unlock;
2940 }
2941 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2942 mop->size, GACC_STORE, mop->key);
2943 }
2944
2945out_unlock:
2946 srcu_read_unlock(&kvm->srcu, srcu_idx);
2947
2948 vfree(tmpbuf);
2949 return r;
2950}
2951
2952static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2953{
2954 void __user *uaddr = (void __user *)mop->buf;
2955 void __user *old_addr = (void __user *)mop->old_addr;
2956 union {
2957 __uint128_t quad;
2958 char raw[sizeof(__uint128_t)];
2959 } old = { .quad = 0}, new = { .quad = 0 };
2960 unsigned int off_in_quad = sizeof(new) - mop->size;
2961 int r, srcu_idx;
2962 bool success;
2963
2964 r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
2965 if (r)
2966 return r;
2967 /*
2968 * This validates off_in_quad. Checking that size is a power
2969 * of two is not necessary, as cmpxchg_guest_abs_with_key
2970 * takes care of that
2971 */
2972 if (mop->size > sizeof(new))
2973 return -EINVAL;
2974 if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
2975 return -EFAULT;
2976 if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
2977 return -EFAULT;
2978
2979 srcu_idx = srcu_read_lock(&kvm->srcu);
2980
2981 if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2982 r = PGM_ADDRESSING;
2983 goto out_unlock;
2984 }
2985
2986 r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
2987 new.quad, mop->key, &success);
2988 if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
2989 r = -EFAULT;
2990
2991out_unlock:
2992 srcu_read_unlock(&kvm->srcu, srcu_idx);
2993 return r;
2994}
2995
2996static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2997{
2998 /*
2999 * This is technically a heuristic only, if the kvm->lock is not
3000 * taken, it is not guaranteed that the vm is/remains non-protected.
3001 * This is ok from a kernel perspective, wrongdoing is detected
3002 * on the access, -EFAULT is returned and the vm may crash the
3003 * next time it accesses the memory in question.
3004 * There is no sane usecase to do switching and a memop on two
3005 * different CPUs at the same time.
3006 */
3007 if (kvm_s390_pv_get_handle(kvm))
3008 return -EINVAL;
3009
3010 switch (mop->op) {
3011 case KVM_S390_MEMOP_ABSOLUTE_READ:
3012 case KVM_S390_MEMOP_ABSOLUTE_WRITE:
3013 return kvm_s390_vm_mem_op_abs(kvm, mop);
3014 case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
3015 return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
3016 default:
3017 return -EINVAL;
3018 }
3019}
3020
3021int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
3022{
3023 struct kvm *kvm = filp->private_data;
3024 void __user *argp = (void __user *)arg;
3025 struct kvm_device_attr attr;
3026 int r;
3027
3028 switch (ioctl) {
3029 case KVM_S390_INTERRUPT: {
3030 struct kvm_s390_interrupt s390int;
3031
3032 r = -EFAULT;
3033 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3034 break;
3035 r = kvm_s390_inject_vm(kvm, &s390int);
3036 break;
3037 }
3038 case KVM_CREATE_IRQCHIP: {
3039 r = -EINVAL;
3040 if (kvm->arch.use_irqchip)
3041 r = 0;
3042 break;
3043 }
3044 case KVM_SET_DEVICE_ATTR: {
3045 r = -EFAULT;
3046 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3047 break;
3048 r = kvm_s390_vm_set_attr(kvm, &attr);
3049 break;
3050 }
3051 case KVM_GET_DEVICE_ATTR: {
3052 r = -EFAULT;
3053 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3054 break;
3055 r = kvm_s390_vm_get_attr(kvm, &attr);
3056 break;
3057 }
3058 case KVM_HAS_DEVICE_ATTR: {
3059 r = -EFAULT;
3060 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3061 break;
3062 r = kvm_s390_vm_has_attr(kvm, &attr);
3063 break;
3064 }
3065 case KVM_S390_GET_SKEYS: {
3066 struct kvm_s390_skeys args;
3067
3068 r = -EFAULT;
3069 if (copy_from_user(&args, argp,
3070 sizeof(struct kvm_s390_skeys)))
3071 break;
3072 r = kvm_s390_get_skeys(kvm, &args);
3073 break;
3074 }
3075 case KVM_S390_SET_SKEYS: {
3076 struct kvm_s390_skeys args;
3077
3078 r = -EFAULT;
3079 if (copy_from_user(&args, argp,
3080 sizeof(struct kvm_s390_skeys)))
3081 break;
3082 r = kvm_s390_set_skeys(kvm, &args);
3083 break;
3084 }
3085 case KVM_S390_GET_CMMA_BITS: {
3086 struct kvm_s390_cmma_log args;
3087
3088 r = -EFAULT;
3089 if (copy_from_user(&args, argp, sizeof(args)))
3090 break;
3091 mutex_lock(&kvm->slots_lock);
3092 r = kvm_s390_get_cmma_bits(kvm, &args);
3093 mutex_unlock(&kvm->slots_lock);
3094 if (!r) {
3095 r = copy_to_user(argp, &args, sizeof(args));
3096 if (r)
3097 r = -EFAULT;
3098 }
3099 break;
3100 }
3101 case KVM_S390_SET_CMMA_BITS: {
3102 struct kvm_s390_cmma_log args;
3103
3104 r = -EFAULT;
3105 if (copy_from_user(&args, argp, sizeof(args)))
3106 break;
3107 mutex_lock(&kvm->slots_lock);
3108 r = kvm_s390_set_cmma_bits(kvm, &args);
3109 mutex_unlock(&kvm->slots_lock);
3110 break;
3111 }
3112 case KVM_S390_PV_COMMAND: {
3113 struct kvm_pv_cmd args;
3114
3115 /* protvirt means user cpu state */
3116 kvm_s390_set_user_cpu_state_ctrl(kvm);
3117 r = 0;
3118 if (!is_prot_virt_host()) {
3119 r = -EINVAL;
3120 break;
3121 }
3122 if (copy_from_user(&args, argp, sizeof(args))) {
3123 r = -EFAULT;
3124 break;
3125 }
3126 if (args.flags) {
3127 r = -EINVAL;
3128 break;
3129 }
3130 /* must be called without kvm->lock */
3131 r = kvm_s390_handle_pv(kvm, &args);
3132 if (copy_to_user(argp, &args, sizeof(args))) {
3133 r = -EFAULT;
3134 break;
3135 }
3136 break;
3137 }
3138 case KVM_S390_MEM_OP: {
3139 struct kvm_s390_mem_op mem_op;
3140
3141 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3142 r = kvm_s390_vm_mem_op(kvm, &mem_op);
3143 else
3144 r = -EFAULT;
3145 break;
3146 }
3147 case KVM_S390_ZPCI_OP: {
3148 struct kvm_s390_zpci_op args;
3149
3150 r = -EINVAL;
3151 if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3152 break;
3153 if (copy_from_user(&args, argp, sizeof(args))) {
3154 r = -EFAULT;
3155 break;
3156 }
3157 r = kvm_s390_pci_zpci_op(kvm, &args);
3158 break;
3159 }
3160 default:
3161 r = -ENOTTY;
3162 }
3163
3164 return r;
3165}
3166
3167static int kvm_s390_apxa_installed(void)
3168{
3169 struct ap_config_info info;
3170
3171 if (ap_instructions_available()) {
3172 if (ap_qci(&info) == 0)
3173 return info.apxa;
3174 }
3175
3176 return 0;
3177}
3178
3179/*
3180 * The format of the crypto control block (CRYCB) is specified in the 3 low
3181 * order bits of the CRYCB designation (CRYCBD) field as follows:
3182 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3183 * AP extended addressing (APXA) facility are installed.
3184 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
3185 * Format 2: Both the APXA and MSAX3 facilities are installed
3186 */
3187static void kvm_s390_set_crycb_format(struct kvm *kvm)
3188{
3189 kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb);
3190
3191 /* Clear the CRYCB format bits - i.e., set format 0 by default */
3192 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3193
3194 /* Check whether MSAX3 is installed */
3195 if (!test_kvm_facility(kvm, 76))
3196 return;
3197
3198 if (kvm_s390_apxa_installed())
3199 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3200 else
3201 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3202}
3203
3204/*
3205 * kvm_arch_crypto_set_masks
3206 *
3207 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3208 * to be set.
3209 * @apm: the mask identifying the accessible AP adapters
3210 * @aqm: the mask identifying the accessible AP domains
3211 * @adm: the mask identifying the accessible AP control domains
3212 *
3213 * Set the masks that identify the adapters, domains and control domains to
3214 * which the KVM guest is granted access.
3215 *
3216 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3217 * function.
3218 */
3219void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3220 unsigned long *aqm, unsigned long *adm)
3221{
3222 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3223
3224 kvm_s390_vcpu_block_all(kvm);
3225
3226 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3227 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3228 memcpy(crycb->apcb1.apm, apm, 32);
3229 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3230 apm[0], apm[1], apm[2], apm[3]);
3231 memcpy(crycb->apcb1.aqm, aqm, 32);
3232 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3233 aqm[0], aqm[1], aqm[2], aqm[3]);
3234 memcpy(crycb->apcb1.adm, adm, 32);
3235 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3236 adm[0], adm[1], adm[2], adm[3]);
3237 break;
3238 case CRYCB_FORMAT1:
3239 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3240 memcpy(crycb->apcb0.apm, apm, 8);
3241 memcpy(crycb->apcb0.aqm, aqm, 2);
3242 memcpy(crycb->apcb0.adm, adm, 2);
3243 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3244 apm[0], *((unsigned short *)aqm),
3245 *((unsigned short *)adm));
3246 break;
3247 default: /* Can not happen */
3248 break;
3249 }
3250
3251 /* recreate the shadow crycb for each vcpu */
3252 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3253 kvm_s390_vcpu_unblock_all(kvm);
3254}
3255EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3256
3257/*
3258 * kvm_arch_crypto_clear_masks
3259 *
3260 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3261 * to be cleared.
3262 *
3263 * Clear the masks that identify the adapters, domains and control domains to
3264 * which the KVM guest is granted access.
3265 *
3266 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3267 * function.
3268 */
3269void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3270{
3271 kvm_s390_vcpu_block_all(kvm);
3272
3273 memset(&kvm->arch.crypto.crycb->apcb0, 0,
3274 sizeof(kvm->arch.crypto.crycb->apcb0));
3275 memset(&kvm->arch.crypto.crycb->apcb1, 0,
3276 sizeof(kvm->arch.crypto.crycb->apcb1));
3277
3278 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3279 /* recreate the shadow crycb for each vcpu */
3280 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3281 kvm_s390_vcpu_unblock_all(kvm);
3282}
3283EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3284
3285static u64 kvm_s390_get_initial_cpuid(void)
3286{
3287 struct cpuid cpuid;
3288
3289 get_cpu_id(&cpuid);
3290 cpuid.version = 0xff;
3291 return *((u64 *) &cpuid);
3292}
3293
3294static void kvm_s390_crypto_init(struct kvm *kvm)
3295{
3296 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3297 kvm_s390_set_crycb_format(kvm);
3298 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3299
3300 if (!test_kvm_facility(kvm, 76))
3301 return;
3302
3303 /* Enable AES/DEA protected key functions by default */
3304 kvm->arch.crypto.aes_kw = 1;
3305 kvm->arch.crypto.dea_kw = 1;
3306 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3307 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3308 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3309 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3310}
3311
3312static void sca_dispose(struct kvm *kvm)
3313{
3314 if (kvm->arch.use_esca)
3315 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3316 else
3317 free_page((unsigned long)(kvm->arch.sca));
3318 kvm->arch.sca = NULL;
3319}
3320
3321void kvm_arch_free_vm(struct kvm *kvm)
3322{
3323 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3324 kvm_s390_pci_clear_list(kvm);
3325
3326 __kvm_arch_free_vm(kvm);
3327}
3328
3329int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3330{
3331 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3332 int i, rc;
3333 char debug_name[16];
3334 static unsigned long sca_offset;
3335
3336 rc = -EINVAL;
3337#ifdef CONFIG_KVM_S390_UCONTROL
3338 if (type & ~KVM_VM_S390_UCONTROL)
3339 goto out_err;
3340 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3341 goto out_err;
3342#else
3343 if (type)
3344 goto out_err;
3345#endif
3346
3347 rc = s390_enable_sie();
3348 if (rc)
3349 goto out_err;
3350
3351 rc = -ENOMEM;
3352
3353 if (!sclp.has_64bscao)
3354 alloc_flags |= GFP_DMA;
3355 rwlock_init(&kvm->arch.sca_lock);
3356 /* start with basic SCA */
3357 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3358 if (!kvm->arch.sca)
3359 goto out_err;
3360 mutex_lock(&kvm_lock);
3361 sca_offset += 16;
3362 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3363 sca_offset = 0;
3364 kvm->arch.sca = (struct bsca_block *)
3365 ((char *) kvm->arch.sca + sca_offset);
3366 mutex_unlock(&kvm_lock);
3367
3368 sprintf(debug_name, "kvm-%u", current->pid);
3369
3370 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3371 if (!kvm->arch.dbf)
3372 goto out_err;
3373
3374 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3375 kvm->arch.sie_page2 =
3376 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3377 if (!kvm->arch.sie_page2)
3378 goto out_err;
3379
3380 kvm->arch.sie_page2->kvm = kvm;
3381 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3382
3383 for (i = 0; i < kvm_s390_fac_size(); i++) {
3384 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3385 (kvm_s390_fac_base[i] |
3386 kvm_s390_fac_ext[i]);
3387 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3388 kvm_s390_fac_base[i];
3389 }
3390 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3391
3392 /* we are always in czam mode - even on pre z14 machines */
3393 set_kvm_facility(kvm->arch.model.fac_mask, 138);
3394 set_kvm_facility(kvm->arch.model.fac_list, 138);
3395 /* we emulate STHYI in kvm */
3396 set_kvm_facility(kvm->arch.model.fac_mask, 74);
3397 set_kvm_facility(kvm->arch.model.fac_list, 74);
3398 if (MACHINE_HAS_TLB_GUEST) {
3399 set_kvm_facility(kvm->arch.model.fac_mask, 147);
3400 set_kvm_facility(kvm->arch.model.fac_list, 147);
3401 }
3402
3403 if (css_general_characteristics.aiv && test_facility(65))
3404 set_kvm_facility(kvm->arch.model.fac_mask, 65);
3405
3406 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3407 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3408
3409 kvm->arch.model.uv_feat_guest.feat = 0;
3410
3411 kvm_s390_crypto_init(kvm);
3412
3413 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3414 mutex_lock(&kvm->lock);
3415 kvm_s390_pci_init_list(kvm);
3416 kvm_s390_vcpu_pci_enable_interp(kvm);
3417 mutex_unlock(&kvm->lock);
3418 }
3419
3420 mutex_init(&kvm->arch.float_int.ais_lock);
3421 spin_lock_init(&kvm->arch.float_int.lock);
3422 for (i = 0; i < FIRQ_LIST_COUNT; i++)
3423 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3424 init_waitqueue_head(&kvm->arch.ipte_wq);
3425 mutex_init(&kvm->arch.ipte_mutex);
3426
3427 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3428 VM_EVENT(kvm, 3, "vm created with type %lu", type);
3429
3430 if (type & KVM_VM_S390_UCONTROL) {
3431 kvm->arch.gmap = NULL;
3432 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3433 } else {
3434 if (sclp.hamax == U64_MAX)
3435 kvm->arch.mem_limit = TASK_SIZE_MAX;
3436 else
3437 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3438 sclp.hamax + 1);
3439 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3440 if (!kvm->arch.gmap)
3441 goto out_err;
3442 kvm->arch.gmap->private = kvm;
3443 kvm->arch.gmap->pfault_enabled = 0;
3444 }
3445
3446 kvm->arch.use_pfmfi = sclp.has_pfmfi;
3447 kvm->arch.use_skf = sclp.has_skey;
3448 spin_lock_init(&kvm->arch.start_stop_lock);
3449 kvm_s390_vsie_init(kvm);
3450 if (use_gisa)
3451 kvm_s390_gisa_init(kvm);
3452 INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3453 kvm->arch.pv.set_aside = NULL;
3454 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3455
3456 return 0;
3457out_err:
3458 free_page((unsigned long)kvm->arch.sie_page2);
3459 debug_unregister(kvm->arch.dbf);
3460 sca_dispose(kvm);
3461 KVM_EVENT(3, "creation of vm failed: %d", rc);
3462 return rc;
3463}
3464
3465void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3466{
3467 u16 rc, rrc;
3468
3469 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3470 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3471 kvm_s390_clear_local_irqs(vcpu);
3472 kvm_clear_async_pf_completion_queue(vcpu);
3473 if (!kvm_is_ucontrol(vcpu->kvm))
3474 sca_del_vcpu(vcpu);
3475 kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3476
3477 if (kvm_is_ucontrol(vcpu->kvm))
3478 gmap_remove(vcpu->arch.gmap);
3479
3480 if (vcpu->kvm->arch.use_cmma)
3481 kvm_s390_vcpu_unsetup_cmma(vcpu);
3482 /* We can not hold the vcpu mutex here, we are already dying */
3483 if (kvm_s390_pv_cpu_get_handle(vcpu))
3484 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3485 free_page((unsigned long)(vcpu->arch.sie_block));
3486}
3487
3488void kvm_arch_destroy_vm(struct kvm *kvm)
3489{
3490 u16 rc, rrc;
3491
3492 kvm_destroy_vcpus(kvm);
3493 sca_dispose(kvm);
3494 kvm_s390_gisa_destroy(kvm);
3495 /*
3496 * We are already at the end of life and kvm->lock is not taken.
3497 * This is ok as the file descriptor is closed by now and nobody
3498 * can mess with the pv state.
3499 */
3500 kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3501 /*
3502 * Remove the mmu notifier only when the whole KVM VM is torn down,
3503 * and only if one was registered to begin with. If the VM is
3504 * currently not protected, but has been previously been protected,
3505 * then it's possible that the notifier is still registered.
3506 */
3507 if (kvm->arch.pv.mmu_notifier.ops)
3508 mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3509
3510 debug_unregister(kvm->arch.dbf);
3511 free_page((unsigned long)kvm->arch.sie_page2);
3512 if (!kvm_is_ucontrol(kvm))
3513 gmap_remove(kvm->arch.gmap);
3514 kvm_s390_destroy_adapters(kvm);
3515 kvm_s390_clear_float_irqs(kvm);
3516 kvm_s390_vsie_destroy(kvm);
3517 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3518}
3519
3520/* Section: vcpu related */
3521static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3522{
3523 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3524 if (!vcpu->arch.gmap)
3525 return -ENOMEM;
3526 vcpu->arch.gmap->private = vcpu->kvm;
3527
3528 return 0;
3529}
3530
3531static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3532{
3533 if (!kvm_s390_use_sca_entries())
3534 return;
3535 read_lock(&vcpu->kvm->arch.sca_lock);
3536 if (vcpu->kvm->arch.use_esca) {
3537 struct esca_block *sca = vcpu->kvm->arch.sca;
3538
3539 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3540 sca->cpu[vcpu->vcpu_id].sda = 0;
3541 } else {
3542 struct bsca_block *sca = vcpu->kvm->arch.sca;
3543
3544 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3545 sca->cpu[vcpu->vcpu_id].sda = 0;
3546 }
3547 read_unlock(&vcpu->kvm->arch.sca_lock);
3548}
3549
3550static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3551{
3552 if (!kvm_s390_use_sca_entries()) {
3553 phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3554
3555 /* we still need the basic sca for the ipte control */
3556 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3557 vcpu->arch.sie_block->scaol = sca_phys;
3558 return;
3559 }
3560 read_lock(&vcpu->kvm->arch.sca_lock);
3561 if (vcpu->kvm->arch.use_esca) {
3562 struct esca_block *sca = vcpu->kvm->arch.sca;
3563 phys_addr_t sca_phys = virt_to_phys(sca);
3564
3565 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3566 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3567 vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3568 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3569 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3570 } else {
3571 struct bsca_block *sca = vcpu->kvm->arch.sca;
3572 phys_addr_t sca_phys = virt_to_phys(sca);
3573
3574 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3575 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3576 vcpu->arch.sie_block->scaol = sca_phys;
3577 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3578 }
3579 read_unlock(&vcpu->kvm->arch.sca_lock);
3580}
3581
3582/* Basic SCA to Extended SCA data copy routines */
3583static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3584{
3585 d->sda = s->sda;
3586 d->sigp_ctrl.c = s->sigp_ctrl.c;
3587 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3588}
3589
3590static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3591{
3592 int i;
3593
3594 d->ipte_control = s->ipte_control;
3595 d->mcn[0] = s->mcn;
3596 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3597 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3598}
3599
3600static int sca_switch_to_extended(struct kvm *kvm)
3601{
3602 struct bsca_block *old_sca = kvm->arch.sca;
3603 struct esca_block *new_sca;
3604 struct kvm_vcpu *vcpu;
3605 unsigned long vcpu_idx;
3606 u32 scaol, scaoh;
3607 phys_addr_t new_sca_phys;
3608
3609 if (kvm->arch.use_esca)
3610 return 0;
3611
3612 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3613 if (!new_sca)
3614 return -ENOMEM;
3615
3616 new_sca_phys = virt_to_phys(new_sca);
3617 scaoh = new_sca_phys >> 32;
3618 scaol = new_sca_phys & ESCA_SCAOL_MASK;
3619
3620 kvm_s390_vcpu_block_all(kvm);
3621 write_lock(&kvm->arch.sca_lock);
3622
3623 sca_copy_b_to_e(new_sca, old_sca);
3624
3625 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3626 vcpu->arch.sie_block->scaoh = scaoh;
3627 vcpu->arch.sie_block->scaol = scaol;
3628 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3629 }
3630 kvm->arch.sca = new_sca;
3631 kvm->arch.use_esca = 1;
3632
3633 write_unlock(&kvm->arch.sca_lock);
3634 kvm_s390_vcpu_unblock_all(kvm);
3635
3636 free_page((unsigned long)old_sca);
3637
3638 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3639 old_sca, kvm->arch.sca);
3640 return 0;
3641}
3642
3643static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3644{
3645 int rc;
3646
3647 if (!kvm_s390_use_sca_entries()) {
3648 if (id < KVM_MAX_VCPUS)
3649 return true;
3650 return false;
3651 }
3652 if (id < KVM_S390_BSCA_CPU_SLOTS)
3653 return true;
3654 if (!sclp.has_esca || !sclp.has_64bscao)
3655 return false;
3656
3657 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3658
3659 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3660}
3661
3662/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3663static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3664{
3665 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3666 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3667 vcpu->arch.cputm_start = get_tod_clock_fast();
3668 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3669}
3670
3671/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3672static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3673{
3674 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3675 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3676 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3677 vcpu->arch.cputm_start = 0;
3678 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3679}
3680
3681/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3682static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3683{
3684 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3685 vcpu->arch.cputm_enabled = true;
3686 __start_cpu_timer_accounting(vcpu);
3687}
3688
3689/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3690static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3691{
3692 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3693 __stop_cpu_timer_accounting(vcpu);
3694 vcpu->arch.cputm_enabled = false;
3695}
3696
3697static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3698{
3699 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3700 __enable_cpu_timer_accounting(vcpu);
3701 preempt_enable();
3702}
3703
3704static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3705{
3706 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3707 __disable_cpu_timer_accounting(vcpu);
3708 preempt_enable();
3709}
3710
3711/* set the cpu timer - may only be called from the VCPU thread itself */
3712void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3713{
3714 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3715 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3716 if (vcpu->arch.cputm_enabled)
3717 vcpu->arch.cputm_start = get_tod_clock_fast();
3718 vcpu->arch.sie_block->cputm = cputm;
3719 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3720 preempt_enable();
3721}
3722
3723/* update and get the cpu timer - can also be called from other VCPU threads */
3724__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3725{
3726 unsigned int seq;
3727 __u64 value;
3728
3729 if (unlikely(!vcpu->arch.cputm_enabled))
3730 return vcpu->arch.sie_block->cputm;
3731
3732 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3733 do {
3734 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3735 /*
3736 * If the writer would ever execute a read in the critical
3737 * section, e.g. in irq context, we have a deadlock.
3738 */
3739 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3740 value = vcpu->arch.sie_block->cputm;
3741 /* if cputm_start is 0, accounting is being started/stopped */
3742 if (likely(vcpu->arch.cputm_start))
3743 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3744 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3745 preempt_enable();
3746 return value;
3747}
3748
3749void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3750{
3751
3752 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3753 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3754 __start_cpu_timer_accounting(vcpu);
3755 vcpu->cpu = cpu;
3756}
3757
3758void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3759{
3760 vcpu->cpu = -1;
3761 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3762 __stop_cpu_timer_accounting(vcpu);
3763 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3764
3765}
3766
3767void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3768{
3769 mutex_lock(&vcpu->kvm->lock);
3770 preempt_disable();
3771 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3772 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3773 preempt_enable();
3774 mutex_unlock(&vcpu->kvm->lock);
3775 if (!kvm_is_ucontrol(vcpu->kvm)) {
3776 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3777 sca_add_vcpu(vcpu);
3778 }
3779 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3780 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3781}
3782
3783static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3784{
3785 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3786 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3787 return true;
3788 return false;
3789}
3790
3791static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3792{
3793 /* At least one ECC subfunction must be present */
3794 return kvm_has_pckmo_subfunc(kvm, 32) ||
3795 kvm_has_pckmo_subfunc(kvm, 33) ||
3796 kvm_has_pckmo_subfunc(kvm, 34) ||
3797 kvm_has_pckmo_subfunc(kvm, 40) ||
3798 kvm_has_pckmo_subfunc(kvm, 41);
3799
3800}
3801
3802static bool kvm_has_pckmo_hmac(struct kvm *kvm)
3803{
3804 /* At least one HMAC subfunction must be present */
3805 return kvm_has_pckmo_subfunc(kvm, 118) ||
3806 kvm_has_pckmo_subfunc(kvm, 122);
3807}
3808
3809static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3810{
3811 /*
3812 * If the AP instructions are not being interpreted and the MSAX3
3813 * facility is not configured for the guest, there is nothing to set up.
3814 */
3815 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3816 return;
3817
3818 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3819 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3820 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3821 vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC);
3822
3823 if (vcpu->kvm->arch.crypto.apie)
3824 vcpu->arch.sie_block->eca |= ECA_APIE;
3825
3826 /* Set up protected key support */
3827 if (vcpu->kvm->arch.crypto.aes_kw) {
3828 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3829 /* ecc/hmac is also wrapped with AES key */
3830 if (kvm_has_pckmo_ecc(vcpu->kvm))
3831 vcpu->arch.sie_block->ecd |= ECD_ECC;
3832 if (kvm_has_pckmo_hmac(vcpu->kvm))
3833 vcpu->arch.sie_block->ecd |= ECD_HMAC;
3834 }
3835
3836 if (vcpu->kvm->arch.crypto.dea_kw)
3837 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3838}
3839
3840void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3841{
3842 free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3843 vcpu->arch.sie_block->cbrlo = 0;
3844}
3845
3846int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3847{
3848 void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3849
3850 if (!cbrlo_page)
3851 return -ENOMEM;
3852
3853 vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
3854 return 0;
3855}
3856
3857static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3858{
3859 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3860
3861 vcpu->arch.sie_block->ibc = model->ibc;
3862 if (test_kvm_facility(vcpu->kvm, 7))
3863 vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3864}
3865
3866static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3867{
3868 int rc = 0;
3869 u16 uvrc, uvrrc;
3870
3871 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3872 CPUSTAT_SM |
3873 CPUSTAT_STOPPED);
3874
3875 if (test_kvm_facility(vcpu->kvm, 78))
3876 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3877 else if (test_kvm_facility(vcpu->kvm, 8))
3878 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3879
3880 kvm_s390_vcpu_setup_model(vcpu);
3881
3882 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3883 if (MACHINE_HAS_ESOP)
3884 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3885 if (test_kvm_facility(vcpu->kvm, 9))
3886 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3887 if (test_kvm_facility(vcpu->kvm, 11))
3888 vcpu->arch.sie_block->ecb |= ECB_PTF;
3889 if (test_kvm_facility(vcpu->kvm, 73))
3890 vcpu->arch.sie_block->ecb |= ECB_TE;
3891 if (!kvm_is_ucontrol(vcpu->kvm))
3892 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3893
3894 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3895 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3896 if (test_kvm_facility(vcpu->kvm, 130))
3897 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3898 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3899 if (sclp.has_cei)
3900 vcpu->arch.sie_block->eca |= ECA_CEI;
3901 if (sclp.has_ib)
3902 vcpu->arch.sie_block->eca |= ECA_IB;
3903 if (sclp.has_siif)
3904 vcpu->arch.sie_block->eca |= ECA_SII;
3905 if (sclp.has_sigpif)
3906 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3907 if (test_kvm_facility(vcpu->kvm, 129)) {
3908 vcpu->arch.sie_block->eca |= ECA_VX;
3909 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3910 }
3911 if (test_kvm_facility(vcpu->kvm, 139))
3912 vcpu->arch.sie_block->ecd |= ECD_MEF;
3913 if (test_kvm_facility(vcpu->kvm, 156))
3914 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3915 if (vcpu->arch.sie_block->gd) {
3916 vcpu->arch.sie_block->eca |= ECA_AIV;
3917 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3918 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3919 }
3920 vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3921 vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
3922
3923 if (sclp.has_kss)
3924 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3925 else
3926 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3927
3928 if (vcpu->kvm->arch.use_cmma) {
3929 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3930 if (rc)
3931 return rc;
3932 }
3933 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3934 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3935
3936 vcpu->arch.sie_block->hpid = HPID_KVM;
3937
3938 kvm_s390_vcpu_crypto_setup(vcpu);
3939
3940 kvm_s390_vcpu_pci_setup(vcpu);
3941
3942 mutex_lock(&vcpu->kvm->lock);
3943 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3944 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3945 if (rc)
3946 kvm_s390_vcpu_unsetup_cmma(vcpu);
3947 }
3948 mutex_unlock(&vcpu->kvm->lock);
3949
3950 return rc;
3951}
3952
3953int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3954{
3955 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3956 return -EINVAL;
3957 return 0;
3958}
3959
3960int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3961{
3962 struct sie_page *sie_page;
3963 int rc;
3964
3965 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3966 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3967 if (!sie_page)
3968 return -ENOMEM;
3969
3970 vcpu->arch.sie_block = &sie_page->sie_block;
3971 vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3972
3973 /* the real guest size will always be smaller than msl */
3974 vcpu->arch.sie_block->mso = 0;
3975 vcpu->arch.sie_block->msl = sclp.hamax;
3976
3977 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3978 spin_lock_init(&vcpu->arch.local_int.lock);
3979 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3980 seqcount_init(&vcpu->arch.cputm_seqcount);
3981
3982 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3983 kvm_clear_async_pf_completion_queue(vcpu);
3984 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3985 KVM_SYNC_GPRS |
3986 KVM_SYNC_ACRS |
3987 KVM_SYNC_CRS |
3988 KVM_SYNC_ARCH0 |
3989 KVM_SYNC_PFAULT |
3990 KVM_SYNC_DIAG318;
3991 vcpu->arch.acrs_loaded = false;
3992 kvm_s390_set_prefix(vcpu, 0);
3993 if (test_kvm_facility(vcpu->kvm, 64))
3994 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3995 if (test_kvm_facility(vcpu->kvm, 82))
3996 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3997 if (test_kvm_facility(vcpu->kvm, 133))
3998 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3999 if (test_kvm_facility(vcpu->kvm, 156))
4000 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
4001 /* fprs can be synchronized via vrs, even if the guest has no vx. With
4002 * cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format.
4003 */
4004 if (cpu_has_vx())
4005 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
4006 else
4007 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
4008
4009 if (kvm_is_ucontrol(vcpu->kvm)) {
4010 rc = __kvm_ucontrol_vcpu_init(vcpu);
4011 if (rc)
4012 goto out_free_sie_block;
4013 }
4014
4015 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
4016 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4017 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4018
4019 rc = kvm_s390_vcpu_setup(vcpu);
4020 if (rc)
4021 goto out_ucontrol_uninit;
4022
4023 kvm_s390_update_topology_change_report(vcpu->kvm, 1);
4024 return 0;
4025
4026out_ucontrol_uninit:
4027 if (kvm_is_ucontrol(vcpu->kvm))
4028 gmap_remove(vcpu->arch.gmap);
4029out_free_sie_block:
4030 free_page((unsigned long)(vcpu->arch.sie_block));
4031 return rc;
4032}
4033
4034int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4035{
4036 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4037 return kvm_s390_vcpu_has_irq(vcpu, 0);
4038}
4039
4040bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
4041{
4042 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
4043}
4044
4045void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
4046{
4047 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4048 exit_sie(vcpu);
4049}
4050
4051void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
4052{
4053 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4054}
4055
4056static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
4057{
4058 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4059 exit_sie(vcpu);
4060}
4061
4062bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
4063{
4064 return atomic_read(&vcpu->arch.sie_block->prog20) &
4065 (PROG_BLOCK_SIE | PROG_REQUEST);
4066}
4067
4068static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
4069{
4070 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4071}
4072
4073/*
4074 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
4075 * If the CPU is not running (e.g. waiting as idle) the function will
4076 * return immediately. */
4077void exit_sie(struct kvm_vcpu *vcpu)
4078{
4079 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
4080 kvm_s390_vsie_kick(vcpu);
4081 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
4082 cpu_relax();
4083}
4084
4085/* Kick a guest cpu out of SIE to process a request synchronously */
4086void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
4087{
4088 __kvm_make_request(req, vcpu);
4089 kvm_s390_vcpu_request(vcpu);
4090}
4091
4092static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
4093 unsigned long end)
4094{
4095 struct kvm *kvm = gmap->private;
4096 struct kvm_vcpu *vcpu;
4097 unsigned long prefix;
4098 unsigned long i;
4099
4100 trace_kvm_s390_gmap_notifier(start, end, gmap_is_shadow(gmap));
4101
4102 if (gmap_is_shadow(gmap))
4103 return;
4104 if (start >= 1UL << 31)
4105 /* We are only interested in prefix pages */
4106 return;
4107 kvm_for_each_vcpu(i, vcpu, kvm) {
4108 /* match against both prefix pages */
4109 prefix = kvm_s390_get_prefix(vcpu);
4110 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
4111 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
4112 start, end);
4113 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4114 }
4115 }
4116}
4117
4118bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
4119{
4120 /* do not poll with more than halt_poll_max_steal percent of steal time */
4121 if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >=
4122 READ_ONCE(halt_poll_max_steal)) {
4123 vcpu->stat.halt_no_poll_steal++;
4124 return true;
4125 }
4126 return false;
4127}
4128
4129int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
4130{
4131 /* kvm common code refers to this, but never calls it */
4132 BUG();
4133 return 0;
4134}
4135
4136static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
4137 struct kvm_one_reg *reg)
4138{
4139 int r = -EINVAL;
4140
4141 switch (reg->id) {
4142 case KVM_REG_S390_TODPR:
4143 r = put_user(vcpu->arch.sie_block->todpr,
4144 (u32 __user *)reg->addr);
4145 break;
4146 case KVM_REG_S390_EPOCHDIFF:
4147 r = put_user(vcpu->arch.sie_block->epoch,
4148 (u64 __user *)reg->addr);
4149 break;
4150 case KVM_REG_S390_CPU_TIMER:
4151 r = put_user(kvm_s390_get_cpu_timer(vcpu),
4152 (u64 __user *)reg->addr);
4153 break;
4154 case KVM_REG_S390_CLOCK_COMP:
4155 r = put_user(vcpu->arch.sie_block->ckc,
4156 (u64 __user *)reg->addr);
4157 break;
4158 case KVM_REG_S390_PFTOKEN:
4159 r = put_user(vcpu->arch.pfault_token,
4160 (u64 __user *)reg->addr);
4161 break;
4162 case KVM_REG_S390_PFCOMPARE:
4163 r = put_user(vcpu->arch.pfault_compare,
4164 (u64 __user *)reg->addr);
4165 break;
4166 case KVM_REG_S390_PFSELECT:
4167 r = put_user(vcpu->arch.pfault_select,
4168 (u64 __user *)reg->addr);
4169 break;
4170 case KVM_REG_S390_PP:
4171 r = put_user(vcpu->arch.sie_block->pp,
4172 (u64 __user *)reg->addr);
4173 break;
4174 case KVM_REG_S390_GBEA:
4175 r = put_user(vcpu->arch.sie_block->gbea,
4176 (u64 __user *)reg->addr);
4177 break;
4178 default:
4179 break;
4180 }
4181
4182 return r;
4183}
4184
4185static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4186 struct kvm_one_reg *reg)
4187{
4188 int r = -EINVAL;
4189 __u64 val;
4190
4191 switch (reg->id) {
4192 case KVM_REG_S390_TODPR:
4193 r = get_user(vcpu->arch.sie_block->todpr,
4194 (u32 __user *)reg->addr);
4195 break;
4196 case KVM_REG_S390_EPOCHDIFF:
4197 r = get_user(vcpu->arch.sie_block->epoch,
4198 (u64 __user *)reg->addr);
4199 break;
4200 case KVM_REG_S390_CPU_TIMER:
4201 r = get_user(val, (u64 __user *)reg->addr);
4202 if (!r)
4203 kvm_s390_set_cpu_timer(vcpu, val);
4204 break;
4205 case KVM_REG_S390_CLOCK_COMP:
4206 r = get_user(vcpu->arch.sie_block->ckc,
4207 (u64 __user *)reg->addr);
4208 break;
4209 case KVM_REG_S390_PFTOKEN:
4210 r = get_user(vcpu->arch.pfault_token,
4211 (u64 __user *)reg->addr);
4212 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4213 kvm_clear_async_pf_completion_queue(vcpu);
4214 break;
4215 case KVM_REG_S390_PFCOMPARE:
4216 r = get_user(vcpu->arch.pfault_compare,
4217 (u64 __user *)reg->addr);
4218 break;
4219 case KVM_REG_S390_PFSELECT:
4220 r = get_user(vcpu->arch.pfault_select,
4221 (u64 __user *)reg->addr);
4222 break;
4223 case KVM_REG_S390_PP:
4224 r = get_user(vcpu->arch.sie_block->pp,
4225 (u64 __user *)reg->addr);
4226 break;
4227 case KVM_REG_S390_GBEA:
4228 r = get_user(vcpu->arch.sie_block->gbea,
4229 (u64 __user *)reg->addr);
4230 break;
4231 default:
4232 break;
4233 }
4234
4235 return r;
4236}
4237
4238static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4239{
4240 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4241 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4242 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4243
4244 kvm_clear_async_pf_completion_queue(vcpu);
4245 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4246 kvm_s390_vcpu_stop(vcpu);
4247 kvm_s390_clear_local_irqs(vcpu);
4248}
4249
4250static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4251{
4252 /* Initial reset is a superset of the normal reset */
4253 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4254
4255 /*
4256 * This equals initial cpu reset in pop, but we don't switch to ESA.
4257 * We do not only reset the internal data, but also ...
4258 */
4259 vcpu->arch.sie_block->gpsw.mask = 0;
4260 vcpu->arch.sie_block->gpsw.addr = 0;
4261 kvm_s390_set_prefix(vcpu, 0);
4262 kvm_s390_set_cpu_timer(vcpu, 0);
4263 vcpu->arch.sie_block->ckc = 0;
4264 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4265 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4266 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4267
4268 /* ... the data in sync regs */
4269 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4270 vcpu->run->s.regs.ckc = 0;
4271 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4272 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4273 vcpu->run->psw_addr = 0;
4274 vcpu->run->psw_mask = 0;
4275 vcpu->run->s.regs.todpr = 0;
4276 vcpu->run->s.regs.cputm = 0;
4277 vcpu->run->s.regs.ckc = 0;
4278 vcpu->run->s.regs.pp = 0;
4279 vcpu->run->s.regs.gbea = 1;
4280 vcpu->run->s.regs.fpc = 0;
4281 /*
4282 * Do not reset these registers in the protected case, as some of
4283 * them are overlaid and they are not accessible in this case
4284 * anyway.
4285 */
4286 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4287 vcpu->arch.sie_block->gbea = 1;
4288 vcpu->arch.sie_block->pp = 0;
4289 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4290 vcpu->arch.sie_block->todpr = 0;
4291 }
4292}
4293
4294static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4295{
4296 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4297
4298 /* Clear reset is a superset of the initial reset */
4299 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4300
4301 memset(®s->gprs, 0, sizeof(regs->gprs));
4302 memset(®s->vrs, 0, sizeof(regs->vrs));
4303 memset(®s->acrs, 0, sizeof(regs->acrs));
4304 memset(®s->gscb, 0, sizeof(regs->gscb));
4305
4306 regs->etoken = 0;
4307 regs->etoken_extension = 0;
4308}
4309
4310int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4311{
4312 vcpu_load(vcpu);
4313 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
4314 vcpu_put(vcpu);
4315 return 0;
4316}
4317
4318int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4319{
4320 vcpu_load(vcpu);
4321 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4322 vcpu_put(vcpu);
4323 return 0;
4324}
4325
4326int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4327 struct kvm_sregs *sregs)
4328{
4329 vcpu_load(vcpu);
4330
4331 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4332 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4333
4334 vcpu_put(vcpu);
4335 return 0;
4336}
4337
4338int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4339 struct kvm_sregs *sregs)
4340{
4341 vcpu_load(vcpu);
4342
4343 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4344 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4345
4346 vcpu_put(vcpu);
4347 return 0;
4348}
4349
4350int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4351{
4352 int ret = 0;
4353
4354 vcpu_load(vcpu);
4355
4356 vcpu->run->s.regs.fpc = fpu->fpc;
4357 if (cpu_has_vx())
4358 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4359 (freg_t *) fpu->fprs);
4360 else
4361 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4362
4363 vcpu_put(vcpu);
4364 return ret;
4365}
4366
4367int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4368{
4369 vcpu_load(vcpu);
4370
4371 if (cpu_has_vx())
4372 convert_vx_to_fp((freg_t *) fpu->fprs,
4373 (__vector128 *) vcpu->run->s.regs.vrs);
4374 else
4375 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4376 fpu->fpc = vcpu->run->s.regs.fpc;
4377
4378 vcpu_put(vcpu);
4379 return 0;
4380}
4381
4382static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4383{
4384 int rc = 0;
4385
4386 if (!is_vcpu_stopped(vcpu))
4387 rc = -EBUSY;
4388 else {
4389 vcpu->run->psw_mask = psw.mask;
4390 vcpu->run->psw_addr = psw.addr;
4391 }
4392 return rc;
4393}
4394
4395int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4396 struct kvm_translation *tr)
4397{
4398 return -EINVAL; /* not implemented yet */
4399}
4400
4401#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4402 KVM_GUESTDBG_USE_HW_BP | \
4403 KVM_GUESTDBG_ENABLE)
4404
4405int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4406 struct kvm_guest_debug *dbg)
4407{
4408 int rc = 0;
4409
4410 vcpu_load(vcpu);
4411
4412 vcpu->guest_debug = 0;
4413 kvm_s390_clear_bp_data(vcpu);
4414
4415 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4416 rc = -EINVAL;
4417 goto out;
4418 }
4419 if (!sclp.has_gpere) {
4420 rc = -EINVAL;
4421 goto out;
4422 }
4423
4424 if (dbg->control & KVM_GUESTDBG_ENABLE) {
4425 vcpu->guest_debug = dbg->control;
4426 /* enforce guest PER */
4427 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4428
4429 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4430 rc = kvm_s390_import_bp_data(vcpu, dbg);
4431 } else {
4432 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4433 vcpu->arch.guestdbg.last_bp = 0;
4434 }
4435
4436 if (rc) {
4437 vcpu->guest_debug = 0;
4438 kvm_s390_clear_bp_data(vcpu);
4439 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4440 }
4441
4442out:
4443 vcpu_put(vcpu);
4444 return rc;
4445}
4446
4447int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4448 struct kvm_mp_state *mp_state)
4449{
4450 int ret;
4451
4452 vcpu_load(vcpu);
4453
4454 /* CHECK_STOP and LOAD are not supported yet */
4455 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4456 KVM_MP_STATE_OPERATING;
4457
4458 vcpu_put(vcpu);
4459 return ret;
4460}
4461
4462int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4463 struct kvm_mp_state *mp_state)
4464{
4465 int rc = 0;
4466
4467 vcpu_load(vcpu);
4468
4469 /* user space knows about this interface - let it control the state */
4470 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4471
4472 switch (mp_state->mp_state) {
4473 case KVM_MP_STATE_STOPPED:
4474 rc = kvm_s390_vcpu_stop(vcpu);
4475 break;
4476 case KVM_MP_STATE_OPERATING:
4477 rc = kvm_s390_vcpu_start(vcpu);
4478 break;
4479 case KVM_MP_STATE_LOAD:
4480 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4481 rc = -ENXIO;
4482 break;
4483 }
4484 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4485 break;
4486 case KVM_MP_STATE_CHECK_STOP:
4487 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
4488 default:
4489 rc = -ENXIO;
4490 }
4491
4492 vcpu_put(vcpu);
4493 return rc;
4494}
4495
4496static bool ibs_enabled(struct kvm_vcpu *vcpu)
4497{
4498 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4499}
4500
4501static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4502{
4503retry:
4504 kvm_s390_vcpu_request_handled(vcpu);
4505 if (!kvm_request_pending(vcpu))
4506 return 0;
4507 /*
4508 * If the guest prefix changed, re-arm the ipte notifier for the
4509 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4510 * This ensures that the ipte instruction for this request has
4511 * already finished. We might race against a second unmapper that
4512 * wants to set the blocking bit. Lets just retry the request loop.
4513 */
4514 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4515 int rc;
4516 rc = gmap_mprotect_notify(vcpu->arch.gmap,
4517 kvm_s390_get_prefix(vcpu),
4518 PAGE_SIZE * 2, PROT_WRITE);
4519 if (rc) {
4520 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4521 return rc;
4522 }
4523 goto retry;
4524 }
4525
4526 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4527 vcpu->arch.sie_block->ihcpu = 0xffff;
4528 goto retry;
4529 }
4530
4531 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4532 if (!ibs_enabled(vcpu)) {
4533 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4534 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4535 }
4536 goto retry;
4537 }
4538
4539 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4540 if (ibs_enabled(vcpu)) {
4541 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4542 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4543 }
4544 goto retry;
4545 }
4546
4547 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4548 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4549 goto retry;
4550 }
4551
4552 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4553 /*
4554 * Disable CMM virtualization; we will emulate the ESSA
4555 * instruction manually, in order to provide additional
4556 * functionalities needed for live migration.
4557 */
4558 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4559 goto retry;
4560 }
4561
4562 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4563 /*
4564 * Re-enable CMM virtualization if CMMA is available and
4565 * CMM has been used.
4566 */
4567 if ((vcpu->kvm->arch.use_cmma) &&
4568 (vcpu->kvm->mm->context.uses_cmm))
4569 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4570 goto retry;
4571 }
4572
4573 /* we left the vsie handler, nothing to do, just clear the request */
4574 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4575
4576 return 0;
4577}
4578
4579static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4580{
4581 struct kvm_vcpu *vcpu;
4582 union tod_clock clk;
4583 unsigned long i;
4584
4585 preempt_disable();
4586
4587 store_tod_clock_ext(&clk);
4588
4589 kvm->arch.epoch = gtod->tod - clk.tod;
4590 kvm->arch.epdx = 0;
4591 if (test_kvm_facility(kvm, 139)) {
4592 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4593 if (kvm->arch.epoch > gtod->tod)
4594 kvm->arch.epdx -= 1;
4595 }
4596
4597 kvm_s390_vcpu_block_all(kvm);
4598 kvm_for_each_vcpu(i, vcpu, kvm) {
4599 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4600 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
4601 }
4602
4603 kvm_s390_vcpu_unblock_all(kvm);
4604 preempt_enable();
4605}
4606
4607int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4608{
4609 if (!mutex_trylock(&kvm->lock))
4610 return 0;
4611 __kvm_s390_set_tod_clock(kvm, gtod);
4612 mutex_unlock(&kvm->lock);
4613 return 1;
4614}
4615
4616static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4617 unsigned long token)
4618{
4619 struct kvm_s390_interrupt inti;
4620 struct kvm_s390_irq irq;
4621
4622 if (start_token) {
4623 irq.u.ext.ext_params2 = token;
4624 irq.type = KVM_S390_INT_PFAULT_INIT;
4625 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4626 } else {
4627 inti.type = KVM_S390_INT_PFAULT_DONE;
4628 inti.parm64 = token;
4629 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4630 }
4631}
4632
4633bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4634 struct kvm_async_pf *work)
4635{
4636 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4637 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4638
4639 return true;
4640}
4641
4642void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4643 struct kvm_async_pf *work)
4644{
4645 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4646 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4647}
4648
4649void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4650 struct kvm_async_pf *work)
4651{
4652 /* s390 will always inject the page directly */
4653}
4654
4655bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4656{
4657 /*
4658 * s390 will always inject the page directly,
4659 * but we still want check_async_completion to cleanup
4660 */
4661 return true;
4662}
4663
4664static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4665{
4666 hva_t hva;
4667 struct kvm_arch_async_pf arch;
4668
4669 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4670 return false;
4671 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4672 vcpu->arch.pfault_compare)
4673 return false;
4674 if (psw_extint_disabled(vcpu))
4675 return false;
4676 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4677 return false;
4678 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4679 return false;
4680 if (!vcpu->arch.gmap->pfault_enabled)
4681 return false;
4682
4683 hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
4684 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4685 return false;
4686
4687 return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
4688}
4689
4690static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4691{
4692 int rc, cpuflags;
4693
4694 /*
4695 * On s390 notifications for arriving pages will be delivered directly
4696 * to the guest but the house keeping for completed pfaults is
4697 * handled outside the worker.
4698 */
4699 kvm_check_async_pf_completion(vcpu);
4700
4701 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4702 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4703
4704 if (need_resched())
4705 schedule();
4706
4707 if (!kvm_is_ucontrol(vcpu->kvm)) {
4708 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4709 if (rc || guestdbg_exit_pending(vcpu))
4710 return rc;
4711 }
4712
4713 rc = kvm_s390_handle_requests(vcpu);
4714 if (rc)
4715 return rc;
4716
4717 if (guestdbg_enabled(vcpu)) {
4718 kvm_s390_backup_guest_per_regs(vcpu);
4719 kvm_s390_patch_guest_per_regs(vcpu);
4720 }
4721
4722 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4723
4724 vcpu->arch.sie_block->icptcode = 0;
4725 current->thread.gmap_int_code = 0;
4726 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4727 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4728 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4729
4730 return 0;
4731}
4732
4733static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
4734{
4735 struct kvm_s390_pgm_info pgm_info = {
4736 .code = PGM_ADDRESSING,
4737 };
4738 u8 opcode, ilen;
4739 int rc;
4740
4741 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4742 trace_kvm_s390_sie_fault(vcpu);
4743
4744 /*
4745 * We want to inject an addressing exception, which is defined as a
4746 * suppressing or terminating exception. However, since we came here
4747 * by a DAT access exception, the PSW still points to the faulting
4748 * instruction since DAT exceptions are nullifying. So we've got
4749 * to look up the current opcode to get the length of the instruction
4750 * to be able to forward the PSW.
4751 */
4752 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4753 ilen = insn_length(opcode);
4754 if (rc < 0) {
4755 return rc;
4756 } else if (rc) {
4757 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4758 * Forward by arbitrary ilc, injection will take care of
4759 * nullification if necessary.
4760 */
4761 pgm_info = vcpu->arch.pgm;
4762 ilen = 4;
4763 }
4764 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4765 kvm_s390_forward_psw(vcpu, ilen);
4766 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4767}
4768
4769static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
4770{
4771 unsigned int flags = 0;
4772 unsigned long gaddr;
4773 int rc = 0;
4774
4775 gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
4776 if (kvm_s390_cur_gmap_fault_is_write())
4777 flags = FAULT_FLAG_WRITE;
4778
4779 switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
4780 case 0:
4781 vcpu->stat.exit_null++;
4782 break;
4783 case PGM_NON_SECURE_STORAGE_ACCESS:
4784 KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4785 "Unexpected program interrupt 0x%x, TEID 0x%016lx",
4786 current->thread.gmap_int_code, current->thread.gmap_teid.val);
4787 /*
4788 * This is normal operation; a page belonging to a protected
4789 * guest has not been imported yet. Try to import the page into
4790 * the protected guest.
4791 */
4792 if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL)
4793 send_sig(SIGSEGV, current, 0);
4794 break;
4795 case PGM_SECURE_STORAGE_ACCESS:
4796 case PGM_SECURE_STORAGE_VIOLATION:
4797 KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4798 "Unexpected program interrupt 0x%x, TEID 0x%016lx",
4799 current->thread.gmap_int_code, current->thread.gmap_teid.val);
4800 /*
4801 * This can happen after a reboot with asynchronous teardown;
4802 * the new guest (normal or protected) will run on top of the
4803 * previous protected guest. The old pages need to be destroyed
4804 * so the new guest can use them.
4805 */
4806 if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) {
4807 /*
4808 * Either KVM messed up the secure guest mapping or the
4809 * same page is mapped into multiple secure guests.
4810 *
4811 * This exception is only triggered when a guest 2 is
4812 * running and can therefore never occur in kernel
4813 * context.
4814 */
4815 pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
4816 current->thread.gmap_int_code, current->comm,
4817 current->pid);
4818 send_sig(SIGSEGV, current, 0);
4819 }
4820 break;
4821 case PGM_PROTECTION:
4822 case PGM_SEGMENT_TRANSLATION:
4823 case PGM_PAGE_TRANSLATION:
4824 case PGM_ASCE_TYPE:
4825 case PGM_REGION_FIRST_TRANS:
4826 case PGM_REGION_SECOND_TRANS:
4827 case PGM_REGION_THIRD_TRANS:
4828 KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4829 "Unexpected program interrupt 0x%x, TEID 0x%016lx",
4830 current->thread.gmap_int_code, current->thread.gmap_teid.val);
4831 if (vcpu->arch.gmap->pfault_enabled) {
4832 rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
4833 if (rc == -EFAULT)
4834 return vcpu_post_run_addressing_exception(vcpu);
4835 if (rc == -EAGAIN) {
4836 trace_kvm_s390_major_guest_pfault(vcpu);
4837 if (kvm_arch_setup_async_pf(vcpu))
4838 return 0;
4839 vcpu->stat.pfault_sync++;
4840 } else {
4841 return rc;
4842 }
4843 }
4844 rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
4845 if (rc == -EFAULT) {
4846 if (kvm_is_ucontrol(vcpu->kvm)) {
4847 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4848 vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
4849 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4850 return -EREMOTE;
4851 }
4852 return vcpu_post_run_addressing_exception(vcpu);
4853 }
4854 break;
4855 default:
4856 KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
4857 current->thread.gmap_int_code, current->thread.gmap_teid.val);
4858 send_sig(SIGSEGV, current, 0);
4859 break;
4860 }
4861 return rc;
4862}
4863
4864static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4865{
4866 struct mcck_volatile_info *mcck_info;
4867 struct sie_page *sie_page;
4868 int rc;
4869
4870 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4871 vcpu->arch.sie_block->icptcode);
4872 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4873
4874 if (guestdbg_enabled(vcpu))
4875 kvm_s390_restore_guest_per_regs(vcpu);
4876
4877 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4878 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4879
4880 if (exit_reason == -EINTR) {
4881 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4882 sie_page = container_of(vcpu->arch.sie_block,
4883 struct sie_page, sie_block);
4884 mcck_info = &sie_page->mcck_info;
4885 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4886 return 0;
4887 }
4888
4889 if (vcpu->arch.sie_block->icptcode > 0) {
4890 rc = kvm_handle_sie_intercept(vcpu);
4891
4892 if (rc != -EOPNOTSUPP)
4893 return rc;
4894 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4895 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4896 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4897 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4898 return -EREMOTE;
4899 }
4900
4901 return vcpu_post_run_handle_fault(vcpu);
4902}
4903
4904#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4905static int __vcpu_run(struct kvm_vcpu *vcpu)
4906{
4907 int rc, exit_reason;
4908 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4909
4910 /*
4911 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4912 * ning the guest), so that memslots (and other stuff) are protected
4913 */
4914 kvm_vcpu_srcu_read_lock(vcpu);
4915
4916 do {
4917 rc = vcpu_pre_run(vcpu);
4918 if (rc || guestdbg_exit_pending(vcpu))
4919 break;
4920
4921 kvm_vcpu_srcu_read_unlock(vcpu);
4922 /*
4923 * As PF_VCPU will be used in fault handler, between
4924 * guest_enter and guest_exit should be no uaccess.
4925 */
4926 local_irq_disable();
4927 guest_enter_irqoff();
4928 __disable_cpu_timer_accounting(vcpu);
4929 local_irq_enable();
4930 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4931 memcpy(sie_page->pv_grregs,
4932 vcpu->run->s.regs.gprs,
4933 sizeof(sie_page->pv_grregs));
4934 }
4935 exit_reason = sie64a(vcpu->arch.sie_block,
4936 vcpu->run->s.regs.gprs,
4937 vcpu->arch.gmap->asce);
4938 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4939 memcpy(vcpu->run->s.regs.gprs,
4940 sie_page->pv_grregs,
4941 sizeof(sie_page->pv_grregs));
4942 /*
4943 * We're not allowed to inject interrupts on intercepts
4944 * that leave the guest state in an "in-between" state
4945 * where the next SIE entry will do a continuation.
4946 * Fence interrupts in our "internal" PSW.
4947 */
4948 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4949 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4950 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4951 }
4952 }
4953 local_irq_disable();
4954 __enable_cpu_timer_accounting(vcpu);
4955 guest_exit_irqoff();
4956 local_irq_enable();
4957 kvm_vcpu_srcu_read_lock(vcpu);
4958
4959 rc = vcpu_post_run(vcpu, exit_reason);
4960 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4961
4962 kvm_vcpu_srcu_read_unlock(vcpu);
4963 return rc;
4964}
4965
4966static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4967{
4968 struct kvm_run *kvm_run = vcpu->run;
4969 struct runtime_instr_cb *riccb;
4970 struct gs_cb *gscb;
4971
4972 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4973 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4974 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4975 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4976 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4977 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4978 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4979 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4980 }
4981 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4982 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4983 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4984 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4985 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4986 kvm_clear_async_pf_completion_queue(vcpu);
4987 }
4988 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4989 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4990 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4991 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4992 }
4993 /*
4994 * If userspace sets the riccb (e.g. after migration) to a valid state,
4995 * we should enable RI here instead of doing the lazy enablement.
4996 */
4997 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4998 test_kvm_facility(vcpu->kvm, 64) &&
4999 riccb->v &&
5000 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
5001 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
5002 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
5003 }
5004 /*
5005 * If userspace sets the gscb (e.g. after migration) to non-zero,
5006 * we should enable GS here instead of doing the lazy enablement.
5007 */
5008 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
5009 test_kvm_facility(vcpu->kvm, 133) &&
5010 gscb->gssm &&
5011 !vcpu->arch.gs_enabled) {
5012 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
5013 vcpu->arch.sie_block->ecb |= ECB_GS;
5014 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
5015 vcpu->arch.gs_enabled = 1;
5016 }
5017 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
5018 test_kvm_facility(vcpu->kvm, 82)) {
5019 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
5020 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
5021 }
5022 if (MACHINE_HAS_GS) {
5023 preempt_disable();
5024 local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5025 if (current->thread.gs_cb) {
5026 vcpu->arch.host_gscb = current->thread.gs_cb;
5027 save_gs_cb(vcpu->arch.host_gscb);
5028 }
5029 if (vcpu->arch.gs_enabled) {
5030 current->thread.gs_cb = (struct gs_cb *)
5031 &vcpu->run->s.regs.gscb;
5032 restore_gs_cb(current->thread.gs_cb);
5033 }
5034 preempt_enable();
5035 }
5036 /* SIE will load etoken directly from SDNX and therefore kvm_run */
5037}
5038
5039static void sync_regs(struct kvm_vcpu *vcpu)
5040{
5041 struct kvm_run *kvm_run = vcpu->run;
5042
5043 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
5044 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
5045 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
5046 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
5047 /* some control register changes require a tlb flush */
5048 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5049 }
5050 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
5051 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
5052 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
5053 }
5054 save_access_regs(vcpu->arch.host_acrs);
5055 restore_access_regs(vcpu->run->s.regs.acrs);
5056 vcpu->arch.acrs_loaded = true;
5057 kvm_s390_fpu_load(vcpu->run);
5058 /* Sync fmt2 only data */
5059 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
5060 sync_regs_fmt2(vcpu);
5061 } else {
5062 /*
5063 * In several places we have to modify our internal view to
5064 * not do things that are disallowed by the ultravisor. For
5065 * example we must not inject interrupts after specific exits
5066 * (e.g. 112 prefix page not secure). We do this by turning
5067 * off the machine check, external and I/O interrupt bits
5068 * of our PSW copy. To avoid getting validity intercepts, we
5069 * do only accept the condition code from userspace.
5070 */
5071 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
5072 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
5073 PSW_MASK_CC;
5074 }
5075
5076 kvm_run->kvm_dirty_regs = 0;
5077}
5078
5079static void store_regs_fmt2(struct kvm_vcpu *vcpu)
5080{
5081 struct kvm_run *kvm_run = vcpu->run;
5082
5083 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
5084 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
5085 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
5086 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
5087 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
5088 if (MACHINE_HAS_GS) {
5089 preempt_disable();
5090 local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5091 if (vcpu->arch.gs_enabled)
5092 save_gs_cb(current->thread.gs_cb);
5093 current->thread.gs_cb = vcpu->arch.host_gscb;
5094 restore_gs_cb(vcpu->arch.host_gscb);
5095 if (!vcpu->arch.host_gscb)
5096 local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
5097 vcpu->arch.host_gscb = NULL;
5098 preempt_enable();
5099 }
5100 /* SIE will save etoken directly into SDNX and therefore kvm_run */
5101}
5102
5103static void store_regs(struct kvm_vcpu *vcpu)
5104{
5105 struct kvm_run *kvm_run = vcpu->run;
5106
5107 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
5108 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
5109 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
5110 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
5111 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
5112 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
5113 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
5114 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
5115 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
5116 save_access_regs(vcpu->run->s.regs.acrs);
5117 restore_access_regs(vcpu->arch.host_acrs);
5118 vcpu->arch.acrs_loaded = false;
5119 kvm_s390_fpu_store(vcpu->run);
5120 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
5121 store_regs_fmt2(vcpu);
5122}
5123
5124int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
5125{
5126 struct kvm_run *kvm_run = vcpu->run;
5127 DECLARE_KERNEL_FPU_ONSTACK32(fpu);
5128 int rc;
5129
5130 /*
5131 * Running a VM while dumping always has the potential to
5132 * produce inconsistent dump data. But for PV vcpus a SIE
5133 * entry while dumping could also lead to a fatal validity
5134 * intercept which we absolutely want to avoid.
5135 */
5136 if (vcpu->kvm->arch.pv.dumping)
5137 return -EINVAL;
5138
5139 if (!vcpu->wants_to_run)
5140 return -EINTR;
5141
5142 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
5143 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
5144 return -EINVAL;
5145
5146 vcpu_load(vcpu);
5147
5148 if (guestdbg_exit_pending(vcpu)) {
5149 kvm_s390_prepare_debug_exit(vcpu);
5150 rc = 0;
5151 goto out;
5152 }
5153
5154 kvm_sigset_activate(vcpu);
5155
5156 /*
5157 * no need to check the return value of vcpu_start as it can only have
5158 * an error for protvirt, but protvirt means user cpu state
5159 */
5160 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
5161 kvm_s390_vcpu_start(vcpu);
5162 } else if (is_vcpu_stopped(vcpu)) {
5163 pr_err_ratelimited("can't run stopped vcpu %d\n",
5164 vcpu->vcpu_id);
5165 rc = -EINVAL;
5166 goto out;
5167 }
5168
5169 kernel_fpu_begin(&fpu, KERNEL_FPC | KERNEL_VXR);
5170 sync_regs(vcpu);
5171 enable_cpu_timer_accounting(vcpu);
5172
5173 might_fault();
5174 rc = __vcpu_run(vcpu);
5175
5176 if (signal_pending(current) && !rc) {
5177 kvm_run->exit_reason = KVM_EXIT_INTR;
5178 rc = -EINTR;
5179 }
5180
5181 if (guestdbg_exit_pending(vcpu) && !rc) {
5182 kvm_s390_prepare_debug_exit(vcpu);
5183 rc = 0;
5184 }
5185
5186 if (rc == -EREMOTE) {
5187 /* userspace support is needed, kvm_run has been prepared */
5188 rc = 0;
5189 }
5190
5191 disable_cpu_timer_accounting(vcpu);
5192 store_regs(vcpu);
5193 kernel_fpu_end(&fpu, KERNEL_FPC | KERNEL_VXR);
5194
5195 kvm_sigset_deactivate(vcpu);
5196
5197 vcpu->stat.exit_userspace++;
5198out:
5199 vcpu_put(vcpu);
5200 return rc;
5201}
5202
5203/*
5204 * store status at address
5205 * we use have two special cases:
5206 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
5207 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
5208 */
5209int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
5210{
5211 unsigned char archmode = 1;
5212 freg_t fprs[NUM_FPRS];
5213 unsigned int px;
5214 u64 clkcomp, cputm;
5215 int rc;
5216
5217 px = kvm_s390_get_prefix(vcpu);
5218 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5219 if (write_guest_abs(vcpu, 163, &archmode, 1))
5220 return -EFAULT;
5221 gpa = 0;
5222 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5223 if (write_guest_real(vcpu, 163, &archmode, 1))
5224 return -EFAULT;
5225 gpa = px;
5226 } else
5227 gpa -= __LC_FPREGS_SAVE_AREA;
5228
5229 /* manually convert vector registers if necessary */
5230 if (cpu_has_vx()) {
5231 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5232 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5233 fprs, 128);
5234 } else {
5235 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5236 vcpu->run->s.regs.fprs, 128);
5237 }
5238 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5239 vcpu->run->s.regs.gprs, 128);
5240 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5241 &vcpu->arch.sie_block->gpsw, 16);
5242 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5243 &px, 4);
5244 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5245 &vcpu->run->s.regs.fpc, 4);
5246 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5247 &vcpu->arch.sie_block->todpr, 4);
5248 cputm = kvm_s390_get_cpu_timer(vcpu);
5249 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5250 &cputm, 8);
5251 clkcomp = vcpu->arch.sie_block->ckc >> 8;
5252 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5253 &clkcomp, 8);
5254 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5255 &vcpu->run->s.regs.acrs, 64);
5256 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5257 &vcpu->arch.sie_block->gcr, 128);
5258 return rc ? -EFAULT : 0;
5259}
5260
5261int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5262{
5263 /*
5264 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5265 * switch in the run ioctl. Let's update our copies before we save
5266 * it into the save area
5267 */
5268 kvm_s390_fpu_store(vcpu->run);
5269 save_access_regs(vcpu->run->s.regs.acrs);
5270
5271 return kvm_s390_store_status_unloaded(vcpu, addr);
5272}
5273
5274static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5275{
5276 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5277 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5278}
5279
5280static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5281{
5282 unsigned long i;
5283 struct kvm_vcpu *vcpu;
5284
5285 kvm_for_each_vcpu(i, vcpu, kvm) {
5286 __disable_ibs_on_vcpu(vcpu);
5287 }
5288}
5289
5290static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5291{
5292 if (!sclp.has_ibs)
5293 return;
5294 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5295 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5296}
5297
5298int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5299{
5300 int i, online_vcpus, r = 0, started_vcpus = 0;
5301
5302 if (!is_vcpu_stopped(vcpu))
5303 return 0;
5304
5305 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5306 /* Only one cpu at a time may enter/leave the STOPPED state. */
5307 spin_lock(&vcpu->kvm->arch.start_stop_lock);
5308 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5309
5310 /* Let's tell the UV that we want to change into the operating state */
5311 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5312 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5313 if (r) {
5314 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5315 return r;
5316 }
5317 }
5318
5319 for (i = 0; i < online_vcpus; i++) {
5320 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5321 started_vcpus++;
5322 }
5323
5324 if (started_vcpus == 0) {
5325 /* we're the only active VCPU -> speed it up */
5326 __enable_ibs_on_vcpu(vcpu);
5327 } else if (started_vcpus == 1) {
5328 /*
5329 * As we are starting a second VCPU, we have to disable
5330 * the IBS facility on all VCPUs to remove potentially
5331 * outstanding ENABLE requests.
5332 */
5333 __disable_ibs_on_all_vcpus(vcpu->kvm);
5334 }
5335
5336 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5337 /*
5338 * The real PSW might have changed due to a RESTART interpreted by the
5339 * ultravisor. We block all interrupts and let the next sie exit
5340 * refresh our view.
5341 */
5342 if (kvm_s390_pv_cpu_is_protected(vcpu))
5343 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5344 /*
5345 * Another VCPU might have used IBS while we were offline.
5346 * Let's play safe and flush the VCPU at startup.
5347 */
5348 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5349 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5350 return 0;
5351}
5352
5353int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5354{
5355 int i, online_vcpus, r = 0, started_vcpus = 0;
5356 struct kvm_vcpu *started_vcpu = NULL;
5357
5358 if (is_vcpu_stopped(vcpu))
5359 return 0;
5360
5361 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5362 /* Only one cpu at a time may enter/leave the STOPPED state. */
5363 spin_lock(&vcpu->kvm->arch.start_stop_lock);
5364 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5365
5366 /* Let's tell the UV that we want to change into the stopped state */
5367 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5368 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5369 if (r) {
5370 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5371 return r;
5372 }
5373 }
5374
5375 /*
5376 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5377 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5378 * have been fully processed. This will ensure that the VCPU
5379 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5380 */
5381 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5382 kvm_s390_clear_stop_irq(vcpu);
5383
5384 __disable_ibs_on_vcpu(vcpu);
5385
5386 for (i = 0; i < online_vcpus; i++) {
5387 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5388
5389 if (!is_vcpu_stopped(tmp)) {
5390 started_vcpus++;
5391 started_vcpu = tmp;
5392 }
5393 }
5394
5395 if (started_vcpus == 1) {
5396 /*
5397 * As we only have one VCPU left, we want to enable the
5398 * IBS facility for that VCPU to speed it up.
5399 */
5400 __enable_ibs_on_vcpu(started_vcpu);
5401 }
5402
5403 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5404 return 0;
5405}
5406
5407static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5408 struct kvm_enable_cap *cap)
5409{
5410 int r;
5411
5412 if (cap->flags)
5413 return -EINVAL;
5414
5415 switch (cap->cap) {
5416 case KVM_CAP_S390_CSS_SUPPORT:
5417 if (!vcpu->kvm->arch.css_support) {
5418 vcpu->kvm->arch.css_support = 1;
5419 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5420 trace_kvm_s390_enable_css(vcpu->kvm);
5421 }
5422 r = 0;
5423 break;
5424 default:
5425 r = -EINVAL;
5426 break;
5427 }
5428 return r;
5429}
5430
5431static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5432 struct kvm_s390_mem_op *mop)
5433{
5434 void __user *uaddr = (void __user *)mop->buf;
5435 void *sida_addr;
5436 int r = 0;
5437
5438 if (mop->flags || !mop->size)
5439 return -EINVAL;
5440 if (mop->size + mop->sida_offset < mop->size)
5441 return -EINVAL;
5442 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5443 return -E2BIG;
5444 if (!kvm_s390_pv_cpu_is_protected(vcpu))
5445 return -EINVAL;
5446
5447 sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5448
5449 switch (mop->op) {
5450 case KVM_S390_MEMOP_SIDA_READ:
5451 if (copy_to_user(uaddr, sida_addr, mop->size))
5452 r = -EFAULT;
5453
5454 break;
5455 case KVM_S390_MEMOP_SIDA_WRITE:
5456 if (copy_from_user(sida_addr, uaddr, mop->size))
5457 r = -EFAULT;
5458 break;
5459 }
5460 return r;
5461}
5462
5463static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5464 struct kvm_s390_mem_op *mop)
5465{
5466 void __user *uaddr = (void __user *)mop->buf;
5467 enum gacc_mode acc_mode;
5468 void *tmpbuf = NULL;
5469 int r;
5470
5471 r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
5472 KVM_S390_MEMOP_F_CHECK_ONLY |
5473 KVM_S390_MEMOP_F_SKEY_PROTECTION);
5474 if (r)
5475 return r;
5476 if (mop->ar >= NUM_ACRS)
5477 return -EINVAL;
5478 if (kvm_s390_pv_cpu_is_protected(vcpu))
5479 return -EINVAL;
5480 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5481 tmpbuf = vmalloc(mop->size);
5482 if (!tmpbuf)
5483 return -ENOMEM;
5484 }
5485
5486 acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
5487 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5488 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5489 acc_mode, mop->key);
5490 goto out_inject;
5491 }
5492 if (acc_mode == GACC_FETCH) {
5493 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5494 mop->size, mop->key);
5495 if (r)
5496 goto out_inject;
5497 if (copy_to_user(uaddr, tmpbuf, mop->size)) {
5498 r = -EFAULT;
5499 goto out_free;
5500 }
5501 } else {
5502 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5503 r = -EFAULT;
5504 goto out_free;
5505 }
5506 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5507 mop->size, mop->key);
5508 }
5509
5510out_inject:
5511 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5512 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5513
5514out_free:
5515 vfree(tmpbuf);
5516 return r;
5517}
5518
5519static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5520 struct kvm_s390_mem_op *mop)
5521{
5522 int r, srcu_idx;
5523
5524 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5525
5526 switch (mop->op) {
5527 case KVM_S390_MEMOP_LOGICAL_READ:
5528 case KVM_S390_MEMOP_LOGICAL_WRITE:
5529 r = kvm_s390_vcpu_mem_op(vcpu, mop);
5530 break;
5531 case KVM_S390_MEMOP_SIDA_READ:
5532 case KVM_S390_MEMOP_SIDA_WRITE:
5533 /* we are locked against sida going away by the vcpu->mutex */
5534 r = kvm_s390_vcpu_sida_op(vcpu, mop);
5535 break;
5536 default:
5537 r = -EINVAL;
5538 }
5539
5540 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5541 return r;
5542}
5543
5544long kvm_arch_vcpu_async_ioctl(struct file *filp,
5545 unsigned int ioctl, unsigned long arg)
5546{
5547 struct kvm_vcpu *vcpu = filp->private_data;
5548 void __user *argp = (void __user *)arg;
5549 int rc;
5550
5551 switch (ioctl) {
5552 case KVM_S390_IRQ: {
5553 struct kvm_s390_irq s390irq;
5554
5555 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5556 return -EFAULT;
5557 rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5558 break;
5559 }
5560 case KVM_S390_INTERRUPT: {
5561 struct kvm_s390_interrupt s390int;
5562 struct kvm_s390_irq s390irq = {};
5563
5564 if (copy_from_user(&s390int, argp, sizeof(s390int)))
5565 return -EFAULT;
5566 if (s390int_to_s390irq(&s390int, &s390irq))
5567 return -EINVAL;
5568 rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5569 break;
5570 }
5571 default:
5572 rc = -ENOIOCTLCMD;
5573 break;
5574 }
5575
5576 /*
5577 * To simplify single stepping of userspace-emulated instructions,
5578 * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
5579 * should_handle_per_ifetch()). However, if userspace emulation injects
5580 * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
5581 * after (and not before) the interrupt delivery.
5582 */
5583 if (!rc)
5584 vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
5585
5586 return rc;
5587}
5588
5589static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5590 struct kvm_pv_cmd *cmd)
5591{
5592 struct kvm_s390_pv_dmp dmp;
5593 void *data;
5594 int ret;
5595
5596 /* Dump initialization is a prerequisite */
5597 if (!vcpu->kvm->arch.pv.dumping)
5598 return -EINVAL;
5599
5600 if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5601 return -EFAULT;
5602
5603 /* We only handle this subcmd right now */
5604 if (dmp.subcmd != KVM_PV_DUMP_CPU)
5605 return -EINVAL;
5606
5607 /* CPU dump length is the same as create cpu storage donation. */
5608 if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5609 return -EINVAL;
5610
5611 data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5612 if (!data)
5613 return -ENOMEM;
5614
5615 ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5616
5617 VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5618 vcpu->vcpu_id, cmd->rc, cmd->rrc);
5619
5620 if (ret)
5621 ret = -EINVAL;
5622
5623 /* On success copy over the dump data */
5624 if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5625 ret = -EFAULT;
5626
5627 kvfree(data);
5628 return ret;
5629}
5630
5631long kvm_arch_vcpu_ioctl(struct file *filp,
5632 unsigned int ioctl, unsigned long arg)
5633{
5634 struct kvm_vcpu *vcpu = filp->private_data;
5635 void __user *argp = (void __user *)arg;
5636 int idx;
5637 long r;
5638 u16 rc, rrc;
5639
5640 vcpu_load(vcpu);
5641
5642 switch (ioctl) {
5643 case KVM_S390_STORE_STATUS:
5644 idx = srcu_read_lock(&vcpu->kvm->srcu);
5645 r = kvm_s390_store_status_unloaded(vcpu, arg);
5646 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5647 break;
5648 case KVM_S390_SET_INITIAL_PSW: {
5649 psw_t psw;
5650
5651 r = -EFAULT;
5652 if (copy_from_user(&psw, argp, sizeof(psw)))
5653 break;
5654 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5655 break;
5656 }
5657 case KVM_S390_CLEAR_RESET:
5658 r = 0;
5659 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5660 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5661 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5662 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5663 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5664 rc, rrc);
5665 }
5666 break;
5667 case KVM_S390_INITIAL_RESET:
5668 r = 0;
5669 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5670 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5671 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5672 UVC_CMD_CPU_RESET_INITIAL,
5673 &rc, &rrc);
5674 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5675 rc, rrc);
5676 }
5677 break;
5678 case KVM_S390_NORMAL_RESET:
5679 r = 0;
5680 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5681 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5682 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5683 UVC_CMD_CPU_RESET, &rc, &rrc);
5684 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5685 rc, rrc);
5686 }
5687 break;
5688 case KVM_SET_ONE_REG:
5689 case KVM_GET_ONE_REG: {
5690 struct kvm_one_reg reg;
5691 r = -EINVAL;
5692 if (kvm_s390_pv_cpu_is_protected(vcpu))
5693 break;
5694 r = -EFAULT;
5695 if (copy_from_user(®, argp, sizeof(reg)))
5696 break;
5697 if (ioctl == KVM_SET_ONE_REG)
5698 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
5699 else
5700 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
5701 break;
5702 }
5703#ifdef CONFIG_KVM_S390_UCONTROL
5704 case KVM_S390_UCAS_MAP: {
5705 struct kvm_s390_ucas_mapping ucasmap;
5706
5707 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5708 r = -EFAULT;
5709 break;
5710 }
5711
5712 if (!kvm_is_ucontrol(vcpu->kvm)) {
5713 r = -EINVAL;
5714 break;
5715 }
5716
5717 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5718 ucasmap.vcpu_addr, ucasmap.length);
5719 break;
5720 }
5721 case KVM_S390_UCAS_UNMAP: {
5722 struct kvm_s390_ucas_mapping ucasmap;
5723
5724 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5725 r = -EFAULT;
5726 break;
5727 }
5728
5729 if (!kvm_is_ucontrol(vcpu->kvm)) {
5730 r = -EINVAL;
5731 break;
5732 }
5733
5734 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5735 ucasmap.length);
5736 break;
5737 }
5738#endif
5739 case KVM_S390_VCPU_FAULT: {
5740 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5741 break;
5742 }
5743 case KVM_ENABLE_CAP:
5744 {
5745 struct kvm_enable_cap cap;
5746 r = -EFAULT;
5747 if (copy_from_user(&cap, argp, sizeof(cap)))
5748 break;
5749 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5750 break;
5751 }
5752 case KVM_S390_MEM_OP: {
5753 struct kvm_s390_mem_op mem_op;
5754
5755 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5756 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5757 else
5758 r = -EFAULT;
5759 break;
5760 }
5761 case KVM_S390_SET_IRQ_STATE: {
5762 struct kvm_s390_irq_state irq_state;
5763
5764 r = -EFAULT;
5765 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5766 break;
5767 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5768 irq_state.len == 0 ||
5769 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5770 r = -EINVAL;
5771 break;
5772 }
5773 /* do not use irq_state.flags, it will break old QEMUs */
5774 r = kvm_s390_set_irq_state(vcpu,
5775 (void __user *) irq_state.buf,
5776 irq_state.len);
5777 break;
5778 }
5779 case KVM_S390_GET_IRQ_STATE: {
5780 struct kvm_s390_irq_state irq_state;
5781
5782 r = -EFAULT;
5783 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5784 break;
5785 if (irq_state.len == 0) {
5786 r = -EINVAL;
5787 break;
5788 }
5789 /* do not use irq_state.flags, it will break old QEMUs */
5790 r = kvm_s390_get_irq_state(vcpu,
5791 (__u8 __user *) irq_state.buf,
5792 irq_state.len);
5793 break;
5794 }
5795 case KVM_S390_PV_CPU_COMMAND: {
5796 struct kvm_pv_cmd cmd;
5797
5798 r = -EINVAL;
5799 if (!is_prot_virt_host())
5800 break;
5801
5802 r = -EFAULT;
5803 if (copy_from_user(&cmd, argp, sizeof(cmd)))
5804 break;
5805
5806 r = -EINVAL;
5807 if (cmd.flags)
5808 break;
5809
5810 /* We only handle this cmd right now */
5811 if (cmd.cmd != KVM_PV_DUMP)
5812 break;
5813
5814 r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5815
5816 /* Always copy over UV rc / rrc data */
5817 if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5818 sizeof(cmd.rc) + sizeof(cmd.rrc)))
5819 r = -EFAULT;
5820 break;
5821 }
5822 default:
5823 r = -ENOTTY;
5824 }
5825
5826 vcpu_put(vcpu);
5827 return r;
5828}
5829
5830vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5831{
5832#ifdef CONFIG_KVM_S390_UCONTROL
5833 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5834 && (kvm_is_ucontrol(vcpu->kvm))) {
5835 vmf->page = virt_to_page(vcpu->arch.sie_block);
5836 get_page(vmf->page);
5837 return 0;
5838 }
5839#endif
5840 return VM_FAULT_SIGBUS;
5841}
5842
5843bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
5844{
5845 return true;
5846}
5847
5848/* Section: memory related */
5849int kvm_arch_prepare_memory_region(struct kvm *kvm,
5850 const struct kvm_memory_slot *old,
5851 struct kvm_memory_slot *new,
5852 enum kvm_mr_change change)
5853{
5854 gpa_t size;
5855
5856 if (kvm_is_ucontrol(kvm))
5857 return -EINVAL;
5858
5859 /* When we are protected, we should not change the memory slots */
5860 if (kvm_s390_pv_get_handle(kvm))
5861 return -EINVAL;
5862
5863 if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
5864 /*
5865 * A few sanity checks. We can have memory slots which have to be
5866 * located/ended at a segment boundary (1MB). The memory in userland is
5867 * ok to be fragmented into various different vmas. It is okay to mmap()
5868 * and munmap() stuff in this slot after doing this call at any time
5869 */
5870
5871 if (new->userspace_addr & 0xffffful)
5872 return -EINVAL;
5873
5874 size = new->npages * PAGE_SIZE;
5875 if (size & 0xffffful)
5876 return -EINVAL;
5877
5878 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5879 return -EINVAL;
5880 }
5881
5882 if (!kvm->arch.migration_mode)
5883 return 0;
5884
5885 /*
5886 * Turn off migration mode when:
5887 * - userspace creates a new memslot with dirty logging off,
5888 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
5889 * dirty logging is turned off.
5890 * Migration mode expects dirty page logging being enabled to store
5891 * its dirty bitmap.
5892 */
5893 if (change != KVM_MR_DELETE &&
5894 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
5895 WARN(kvm_s390_vm_stop_migration(kvm),
5896 "Failed to stop migration mode");
5897
5898 return 0;
5899}
5900
5901void kvm_arch_commit_memory_region(struct kvm *kvm,
5902 struct kvm_memory_slot *old,
5903 const struct kvm_memory_slot *new,
5904 enum kvm_mr_change change)
5905{
5906 int rc = 0;
5907
5908 switch (change) {
5909 case KVM_MR_DELETE:
5910 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5911 old->npages * PAGE_SIZE);
5912 break;
5913 case KVM_MR_MOVE:
5914 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5915 old->npages * PAGE_SIZE);
5916 if (rc)
5917 break;
5918 fallthrough;
5919 case KVM_MR_CREATE:
5920 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5921 new->base_gfn * PAGE_SIZE,
5922 new->npages * PAGE_SIZE);
5923 break;
5924 case KVM_MR_FLAGS_ONLY:
5925 break;
5926 default:
5927 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5928 }
5929 if (rc)
5930 pr_warn("failed to commit memory region\n");
5931 return;
5932}
5933
5934static inline unsigned long nonhyp_mask(int i)
5935{
5936 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5937
5938 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5939}
5940
5941static int __init kvm_s390_init(void)
5942{
5943 int i, r;
5944
5945 if (!sclp.has_sief2) {
5946 pr_info("SIE is not available\n");
5947 return -ENODEV;
5948 }
5949
5950 if (nested && hpage) {
5951 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5952 return -EINVAL;
5953 }
5954
5955 for (i = 0; i < 16; i++)
5956 kvm_s390_fac_base[i] |=
5957 stfle_fac_list[i] & nonhyp_mask(i);
5958
5959 r = __kvm_s390_init();
5960 if (r)
5961 return r;
5962
5963 r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5964 if (r) {
5965 __kvm_s390_exit();
5966 return r;
5967 }
5968 return 0;
5969}
5970
5971static void __exit kvm_s390_exit(void)
5972{
5973 kvm_exit();
5974
5975 __kvm_s390_exit();
5976}
5977
5978module_init(kvm_s390_init);
5979module_exit(kvm_s390_exit);
5980
5981/*
5982 * Enable autoloading of the kvm module.
5983 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5984 * since x86 takes a different approach.
5985 */
5986#include <linux/miscdevice.h>
5987MODULE_ALIAS_MISCDEV(KVM_MINOR);
5988MODULE_ALIAS("devname:kvm");